diff --git a/.github/update.log b/.github/update.log index 25a0c8f3fa..c4afaa121a 100644 --- a/.github/update.log +++ b/.github/update.log @@ -614,3 +614,4 @@ Update On Mon Apr 8 20:30:51 CEST 2024 Update On Tue Apr 9 20:25:08 CEST 2024 Update On Wed Apr 10 20:31:24 CEST 2024 Update On Thu Apr 11 20:26:19 CEST 2024 +Update On Fri Apr 12 20:24:06 CEST 2024 diff --git a/clash-meta/adapter/provider/provider.go b/clash-meta/adapter/provider/provider.go index 2715a30972..a40a50ec44 100644 --- a/clash-meta/adapter/provider/provider.go +++ b/clash-meta/adapter/provider/provider.go @@ -124,8 +124,8 @@ func (pp *proxySetProvider) getSubscriptionInfo() { go func() { ctx, cancel := context.WithTimeout(context.Background(), time.Second*90) defer cancel() - resp, err := mihomoHttp.HttpRequest(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), - http.MethodGet, http.Header{"User-Agent": {C.UA}}, nil) + resp, err := mihomoHttp.HttpRequestWithProxy(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), + http.MethodGet, http.Header{"User-Agent": {C.UA}}, nil, pp.Vehicle().Proxy()) if err != nil { return } @@ -133,8 +133,8 @@ func (pp *proxySetProvider) getSubscriptionInfo() { userInfoStr := strings.TrimSpace(resp.Header.Get("subscription-userinfo")) if userInfoStr == "" { - resp2, err := mihomoHttp.HttpRequest(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), - http.MethodGet, http.Header{"User-Agent": {"Quantumultx"}}, nil) + resp2, err := mihomoHttp.HttpRequestWithProxy(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), + http.MethodGet, http.Header{"User-Agent": {"Quantumultx"}}, nil, pp.Vehicle().Proxy()) if err != nil { return } diff --git a/clash-meta/component/resolver/system.go b/clash-meta/component/resolver/system.go new file mode 100644 index 0000000000..4ed990a365 --- /dev/null +++ b/clash-meta/component/resolver/system.go @@ -0,0 +1,35 @@ +package resolver + +import "sync" + +var blacklist struct { + Map map[string]struct{} + Mutex sync.Mutex +} + +func AddSystemDnsBlacklist(names ...string) { + blacklist.Mutex.Lock() + defer blacklist.Mutex.Unlock() + for _, name := range names { + blacklist.Map[name] = struct{}{} + } +} + +func RemoveSystemDnsBlacklist(names ...string) { + blacklist.Mutex.Lock() + defer blacklist.Mutex.Unlock() + for _, name := range names { + delete(blacklist.Map, name) + } +} + +func IsSystemDnsBlacklisted(names ...string) bool { + blacklist.Mutex.Lock() + defer blacklist.Mutex.Unlock() + for _, name := range names { + if _, ok := blacklist.Map[name]; ok { + return true + } + } + return false +} diff --git a/clash-meta/component/resource/vehicle.go b/clash-meta/component/resource/vehicle.go index 2d71be9455..b13369d22e 100644 --- a/clash-meta/component/resource/vehicle.go +++ b/clash-meta/component/resource/vehicle.go @@ -28,6 +28,10 @@ func (f *FileVehicle) Read() ([]byte, error) { return os.ReadFile(f.path) } +func (f *FileVehicle) Proxy() string { + return "" +} + func NewFileVehicle(path string) *FileVehicle { return &FileVehicle{path: path} } @@ -51,6 +55,10 @@ func (h *HTTPVehicle) Path() string { return h.path } +func (h *HTTPVehicle) Proxy() string { + return h.proxy +} + func (h *HTTPVehicle) Read() ([]byte, error) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*20) defer cancel() diff --git a/clash-meta/constant/provider/interface.go b/clash-meta/constant/provider/interface.go index f2b6939e81..bb73d1bce2 100644 --- a/clash-meta/constant/provider/interface.go +++ b/clash-meta/constant/provider/interface.go @@ -31,6 +31,7 @@ func (v VehicleType) String() string { type Vehicle interface { Read() ([]byte, error) Path() string + Proxy() string Type() VehicleType } diff --git a/clash-meta/dns/system.go b/clash-meta/dns/system.go index 37607a60bf..dc1006fa56 100644 --- a/clash-meta/dns/system.go +++ b/clash-meta/dns/system.go @@ -8,6 +8,7 @@ import ( "sync" "time" + "github.com/metacubex/mihomo/component/resolver" "github.com/metacubex/mihomo/log" D "github.com/miekg/dns" @@ -39,6 +40,9 @@ func (c *systemClient) getDnsClients() ([]dnsClient, error) { if nameservers, err = dnsReadConfig(); err == nil { log.Debugln("[DNS] system dns update to %s", nameservers) for _, addr := range nameservers { + if resolver.IsSystemDnsBlacklisted(addr) { + continue + } if _, ok := c.dnsClients[addr]; !ok { clients := transform( []NameServer{{ diff --git a/clash-meta/listener/sing_tun/server.go b/clash-meta/listener/sing_tun/server.go index 8fe534df53..7cd9fc7235 100644 --- a/clash-meta/listener/sing_tun/server.go +++ b/clash-meta/listener/sing_tun/server.go @@ -12,6 +12,7 @@ import ( "github.com/metacubex/mihomo/adapter/inbound" "github.com/metacubex/mihomo/component/dialer" "github.com/metacubex/mihomo/component/iface" + "github.com/metacubex/mihomo/component/resolver" C "github.com/metacubex/mihomo/constant" LC "github.com/metacubex/mihomo/listener/config" "github.com/metacubex/mihomo/listener/sing" @@ -39,6 +40,8 @@ type Listener struct { networkUpdateMonitor tun.NetworkUpdateMonitor defaultInterfaceMonitor tun.DefaultInterfaceMonitor packageManager tun.PackageManager + + dnsServerIp []string } func CalculateInterfaceName(name string) (tunName string) { @@ -147,12 +150,16 @@ func New(options LC.Tun, tunnel C.Tunnel, additions ...inbound.Addition) (l *Lis dnsAdds = append(dnsAdds, addrPort) } + + var dnsServerIp []string for _, a := range options.Inet4Address { addrPort := netip.AddrPortFrom(a.Addr().Next(), 53) + dnsServerIp = append(dnsServerIp, a.Addr().Next().String()) dnsAdds = append(dnsAdds, addrPort) } for _, a := range options.Inet6Address { addrPort := netip.AddrPortFrom(a.Addr().Next(), 53) + dnsServerIp = append(dnsServerIp, a.Addr().Next().String()) dnsAdds = append(dnsAdds, addrPort) } @@ -244,6 +251,10 @@ func New(options LC.Tun, tunnel C.Tunnel, additions ...inbound.Addition) (l *Lis return } + l.dnsServerIp = dnsServerIp + // after tun.New sing-tun has set DNS to TUN interface + resolver.AddSystemDnsBlacklist(dnsServerIp...) + stackOptions := tun.StackOptions{ Context: context.TODO(), Tun: tunIf, @@ -336,6 +347,7 @@ func parseRange(uidRanges []ranges.Range[uint32], rangeList []string) ([]ranges. func (l *Listener) Close() error { l.closed = true + resolver.RemoveSystemDnsBlacklist(l.dnsServerIp...) return common.Close( l.tunStack, l.tunIf, diff --git a/clash-nyanpasu/README.md b/clash-nyanpasu/README.md index b0bc1f1e72..cd65476b13 100644 --- a/clash-nyanpasu/README.md +++ b/clash-nyanpasu/README.md @@ -1,5 +1,5 @@

- Clash + Clash
Clash Nyanpasu
diff --git a/clash-nyanpasu/backend/Cargo.lock b/clash-nyanpasu/backend/Cargo.lock index d0e162a4c3..55f2bac4c9 100644 --- a/clash-nyanpasu/backend/Cargo.lock +++ b/clash-nyanpasu/backend/Cargo.lock @@ -403,9 +403,9 @@ checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799" [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", diff --git a/clash-nyanpasu/backend/tauri/src/main.rs b/clash-nyanpasu/backend/tauri/src/main.rs index a38c0ce775..b6e538c7e8 100644 --- a/clash-nyanpasu/backend/tauri/src/main.rs +++ b/clash-nyanpasu/backend/tauri/src/main.rs @@ -50,7 +50,11 @@ fn main() -> std::io::Result<()> { commands::parse().unwrap(); // Should be in first place in order prevent single instance check block everything - tauri_plugin_deep_link::prepare("moe.elaina.clash.nyanpasu"); + tauri_plugin_deep_link::prepare(if cfg!(feature = "verge-dev") { + "moe.elaina.clash.nyanpasu.dev" + } else { + "moe.elaina.clash.nyanpasu" + }); // 单例检测 let single_instance_result = utils::init::check_singleton(); diff --git a/clash-nyanpasu/backend/tauri/tauri.conf.json b/clash-nyanpasu/backend/tauri/tauri.conf.json index eee588f378..a977f1e888 100644 --- a/clash-nyanpasu/backend/tauri/tauri.conf.json +++ b/clash-nyanpasu/backend/tauri/tauri.conf.json @@ -4,7 +4,7 @@ "version": "1.5.1" }, "build": { - "distDir": "../../dist", + "distDir": "../../frontend/nyanpasu/dist", "devPath": "http://localhost:3000/", "beforeDevCommand": "pnpm run web:dev", "beforeBuildCommand": "pnpm run web:build" diff --git a/clash-nyanpasu/index.html b/clash-nyanpasu/frontend/nyanpasu/index.html similarity index 100% rename from clash-nyanpasu/index.html rename to clash-nyanpasu/frontend/nyanpasu/index.html diff --git a/clash-nyanpasu/frontend/nyanpasu/package.json b/clash-nyanpasu/frontend/nyanpasu/package.json new file mode 100644 index 0000000000..9f6523b477 --- /dev/null +++ b/clash-nyanpasu/frontend/nyanpasu/package.json @@ -0,0 +1,59 @@ +{ + "name": "@nyanpasu/nyanpasu", + "version": "1.5.1", + "license": "GPL-3.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "serve": "vite preview" + }, + "dependencies": { + "@dnd-kit/core": "6.1.0", + "@dnd-kit/sortable": "8.0.0", + "@dnd-kit/utilities": "3.2.2", + "@emotion/react": "11.11.4", + "@emotion/styled": "11.11.5", + "@generouted/react-router": "1.18.8", + "@juggle/resize-observer": "3.4.0", + "@mui/icons-material": "5.15.15", + "@mui/lab": "5.0.0-alpha.170", + "@mui/material": "5.15.15", + "@mui/x-data-grid": "7.1.1", + "@tauri-apps/api": "1.5.3", + "ahooks": "3.7.11", + "axios": "1.6.8", + "dayjs": "1.11.10", + "framer-motion": "11.0.28", + "i18next": "23.11.1", + "monaco-editor": "0.47.0", + "mui-color-input": "2.0.3", + "react": "18.2.0", + "react-dom": "18.2.0", + "react-error-boundary": "4.0.13", + "react-hook-form": "7.51.3", + "react-i18next": "14.1.0", + "react-markdown": "9.0.1", + "react-router-dom": "6.22.3", + "react-transition-group": "4.4.5", + "react-virtuoso": "4.7.8", + "recoil": "0.7.7", + "swr": "2.2.5" + }, + "devDependencies": { + "@types/js-cookie": "3.0.6", + "@types/react": "18.2.77", + "@types/react-dom": "18.2.25", + "@types/react-transition-group": "4.4.10", + "@typescript-eslint/eslint-plugin": "7.6.0", + "@typescript-eslint/parser": "7.6.0", + "@vitejs/plugin-react": "4.2.1", + "sass": "1.74.1", + "shiki": "1.3.0", + "vite": "5.2.8", + "vite-plugin-monaco-editor": "1.1.0", + "vite-plugin-sass-dts": "1.3.17", + "vite-plugin-svgr": "4.2.0", + "vite-tsconfig-paths": "4.3.2" + } +} diff --git a/clash-nyanpasu/postcss.config.js b/clash-nyanpasu/frontend/nyanpasu/postcss.config.js similarity index 100% rename from clash-nyanpasu/postcss.config.js rename to clash-nyanpasu/frontend/nyanpasu/postcss.config.js diff --git a/clash-nyanpasu/src/assets/fonts/Twemoji.Mozilla.ttf b/clash-nyanpasu/frontend/nyanpasu/src/assets/fonts/Twemoji.Mozilla.ttf similarity index 100% rename from clash-nyanpasu/src/assets/fonts/Twemoji.Mozilla.ttf rename to clash-nyanpasu/frontend/nyanpasu/src/assets/fonts/Twemoji.Mozilla.ttf diff --git a/clash-nyanpasu/src/assets/image/christmas-hat.svg b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/christmas-hat.svg similarity index 100% rename from clash-nyanpasu/src/assets/image/christmas-hat.svg rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/christmas-hat.svg diff --git a/clash-nyanpasu/src/assets/image/logo-blue.svg b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-blue.svg similarity index 100% rename from clash-nyanpasu/src/assets/image/logo-blue.svg rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-blue.svg diff --git a/clash-nyanpasu/src/assets/image/logo-box.png b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-box.png similarity index 100% rename from clash-nyanpasu/src/assets/image/logo-box.png rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-box.png diff --git a/clash-nyanpasu/src/assets/image/logo-christmas.png b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-christmas.png similarity index 100% rename from clash-nyanpasu/src/assets/image/logo-christmas.png rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-christmas.png diff --git a/clash-nyanpasu/src/assets/image/logo-pink.svg b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-pink.svg similarity index 100% rename from clash-nyanpasu/src/assets/image/logo-pink.svg rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo-pink.svg diff --git a/clash-nyanpasu/src/assets/image/logo.ico b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo.ico similarity index 100% rename from clash-nyanpasu/src/assets/image/logo.ico rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo.ico diff --git a/clash-nyanpasu/src/assets/image/logo.png b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo.png similarity index 100% rename from clash-nyanpasu/src/assets/image/logo.png rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo.png diff --git a/clash-nyanpasu/src/assets/image/logo.svg b/clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo.svg similarity index 100% rename from clash-nyanpasu/src/assets/image/logo.svg rename to clash-nyanpasu/frontend/nyanpasu/src/assets/image/logo.svg diff --git a/clash-nyanpasu/src/assets/styles/fonts.scss b/clash-nyanpasu/frontend/nyanpasu/src/assets/styles/fonts.scss similarity index 100% rename from clash-nyanpasu/src/assets/styles/fonts.scss rename to clash-nyanpasu/frontend/nyanpasu/src/assets/styles/fonts.scss diff --git a/clash-nyanpasu/src/assets/styles/index.scss b/clash-nyanpasu/frontend/nyanpasu/src/assets/styles/index.scss similarity index 100% rename from clash-nyanpasu/src/assets/styles/index.scss rename to clash-nyanpasu/frontend/nyanpasu/src/assets/styles/index.scss diff --git a/clash-nyanpasu/src/assets/styles/layout.scss b/clash-nyanpasu/frontend/nyanpasu/src/assets/styles/layout.scss similarity index 100% rename from clash-nyanpasu/src/assets/styles/layout.scss rename to clash-nyanpasu/frontend/nyanpasu/src/assets/styles/layout.scss diff --git a/clash-nyanpasu/src/assets/styles/page.scss b/clash-nyanpasu/frontend/nyanpasu/src/assets/styles/page.scss similarity index 100% rename from clash-nyanpasu/src/assets/styles/page.scss rename to clash-nyanpasu/frontend/nyanpasu/src/assets/styles/page.scss diff --git a/clash-nyanpasu/src/components/base/base-dialog.module.scss b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-dialog.module.scss similarity index 100% rename from clash-nyanpasu/src/components/base/base-dialog.module.scss rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-dialog.module.scss diff --git a/clash-nyanpasu/src/components/base/base-dialog.module.scss.d.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-dialog.module.scss.d.ts similarity index 100% rename from clash-nyanpasu/src/components/base/base-dialog.module.scss.d.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-dialog.module.scss.d.ts diff --git a/clash-nyanpasu/src/components/base/base-dialog.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-dialog.tsx similarity index 100% rename from clash-nyanpasu/src/components/base/base-dialog.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-dialog.tsx diff --git a/clash-nyanpasu/src/components/base/base-empty.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-empty.tsx similarity index 100% rename from clash-nyanpasu/src/components/base/base-empty.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-empty.tsx diff --git a/clash-nyanpasu/src/components/base/base-error-boundary.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-error-boundary.tsx similarity index 100% rename from clash-nyanpasu/src/components/base/base-error-boundary.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-error-boundary.tsx diff --git a/clash-nyanpasu/src/components/base/base-loading.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-loading.tsx similarity index 100% rename from clash-nyanpasu/src/components/base/base-loading.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-loading.tsx diff --git a/clash-nyanpasu/src/components/base/base-notice.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-notice.tsx similarity index 100% rename from clash-nyanpasu/src/components/base/base-notice.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-notice.tsx diff --git a/clash-nyanpasu/src/components/base/base-page.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/base/base-page.tsx similarity index 100% rename from clash-nyanpasu/src/components/base/base-page.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/base-page.tsx diff --git a/clash-nyanpasu/src/components/base/index.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/base/index.ts similarity index 100% rename from clash-nyanpasu/src/components/base/index.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/base/index.ts diff --git a/clash-nyanpasu/src/components/common/kbd.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/common/kbd.tsx similarity index 100% rename from clash-nyanpasu/src/components/common/kbd.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/common/kbd.tsx diff --git a/clash-nyanpasu/src/components/common/mdy-switch.scss b/clash-nyanpasu/frontend/nyanpasu/src/components/common/mdy-switch.scss similarity index 100% rename from clash-nyanpasu/src/components/common/mdy-switch.scss rename to clash-nyanpasu/frontend/nyanpasu/src/components/common/mdy-switch.scss diff --git a/clash-nyanpasu/src/components/common/mdy-switch.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/common/mdy-switch.tsx similarity index 100% rename from clash-nyanpasu/src/components/common/mdy-switch.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/common/mdy-switch.tsx diff --git a/clash-nyanpasu/src/components/connection/connection-detail.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/connection/connection-detail.tsx similarity index 100% rename from clash-nyanpasu/src/components/connection/connection-detail.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/connection/connection-detail.tsx diff --git a/clash-nyanpasu/src/components/connection/connection-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/connection/connection-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/connection/connection-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/connection/connection-item.tsx diff --git a/clash-nyanpasu/src/components/connection/connection-table.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/connection/connection-table.tsx similarity index 100% rename from clash-nyanpasu/src/components/connection/connection-table.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/connection/connection-table.tsx diff --git a/clash-nyanpasu/src/components/layout/layout-control.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/layout-control.tsx similarity index 100% rename from clash-nyanpasu/src/components/layout/layout-control.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/layout-control.tsx diff --git a/clash-nyanpasu/src/components/layout/layout-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/layout-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/layout/layout-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/layout-item.tsx diff --git a/clash-nyanpasu/src/components/layout/layout-traffic.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/layout-traffic.tsx similarity index 100% rename from clash-nyanpasu/src/components/layout/layout-traffic.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/layout-traffic.tsx diff --git a/clash-nyanpasu/src/components/layout/page-transition.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/page-transition.tsx similarity index 100% rename from clash-nyanpasu/src/components/layout/page-transition.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/page-transition.tsx diff --git a/clash-nyanpasu/src/components/layout/traffic-graph.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/traffic-graph.tsx similarity index 100% rename from clash-nyanpasu/src/components/layout/traffic-graph.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/traffic-graph.tsx diff --git a/clash-nyanpasu/src/components/layout/update-button.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/update-button.tsx similarity index 100% rename from clash-nyanpasu/src/components/layout/update-button.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/update-button.tsx diff --git a/clash-nyanpasu/src/components/layout/use-custom-theme.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/use-custom-theme.ts similarity index 100% rename from clash-nyanpasu/src/components/layout/use-custom-theme.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/use-custom-theme.ts diff --git a/clash-nyanpasu/src/components/layout/use-log-setup.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/layout/use-log-setup.ts similarity index 100% rename from clash-nyanpasu/src/components/layout/use-log-setup.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/layout/use-log-setup.ts diff --git a/clash-nyanpasu/src/components/log/log-item.module.scss b/clash-nyanpasu/frontend/nyanpasu/src/components/log/log-item.module.scss similarity index 100% rename from clash-nyanpasu/src/components/log/log-item.module.scss rename to clash-nyanpasu/frontend/nyanpasu/src/components/log/log-item.module.scss diff --git a/clash-nyanpasu/src/components/log/log-item.module.scss.d.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/log/log-item.module.scss.d.ts similarity index 100% rename from clash-nyanpasu/src/components/log/log-item.module.scss.d.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/log/log-item.module.scss.d.ts diff --git a/clash-nyanpasu/src/components/log/log-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/log/log-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/log/log-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/log/log-item.tsx diff --git a/clash-nyanpasu/src/components/profile/editor-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/editor-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/profile/editor-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/editor-viewer.tsx diff --git a/clash-nyanpasu/src/components/profile/file-input.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/file-input.tsx similarity index 100% rename from clash-nyanpasu/src/components/profile/file-input.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/file-input.tsx diff --git a/clash-nyanpasu/src/components/profile/log-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/log-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/profile/log-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/log-viewer.tsx diff --git a/clash-nyanpasu/src/components/profile/profile-box.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-box.tsx similarity index 100% rename from clash-nyanpasu/src/components/profile/profile-box.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-box.tsx diff --git a/clash-nyanpasu/src/components/profile/profile-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/profile/profile-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-item.tsx diff --git a/clash-nyanpasu/src/components/profile/profile-more.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-more.tsx similarity index 100% rename from clash-nyanpasu/src/components/profile/profile-more.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-more.tsx diff --git a/clash-nyanpasu/src/components/profile/profile-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-viewer.tsx similarity index 99% rename from clash-nyanpasu/src/components/profile/profile-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-viewer.tsx index 2deaeae1b9..f47132b0c8 100644 --- a/clash-nyanpasu/src/components/profile/profile-viewer.tsx +++ b/clash-nyanpasu/frontend/nyanpasu/src/components/profile/profile-viewer.tsx @@ -217,7 +217,7 @@ export const ProfileViewer = forwardRef( )} diff --git a/clash-nyanpasu/src/components/providers/rules-provider.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/providers/rules-provider.tsx similarity index 100% rename from clash-nyanpasu/src/components/providers/rules-provider.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/providers/rules-provider.tsx diff --git a/clash-nyanpasu/src/components/proxy/provider-button.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/provider-button.tsx similarity index 100% rename from clash-nyanpasu/src/components/proxy/provider-button.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/provider-button.tsx diff --git a/clash-nyanpasu/src/components/proxy/proxy-group.module.scss b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-group.module.scss similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-group.module.scss rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-group.module.scss diff --git a/clash-nyanpasu/src/components/proxy/proxy-group.module.scss.d.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-group.module.scss.d.ts similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-group.module.scss.d.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-group.module.scss.d.ts diff --git a/clash-nyanpasu/src/components/proxy/proxy-groups.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-groups.tsx similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-groups.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-groups.tsx diff --git a/clash-nyanpasu/src/components/proxy/proxy-head.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-head.tsx similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-head.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-head.tsx diff --git a/clash-nyanpasu/src/components/proxy/proxy-item-mini.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-item-mini.tsx similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-item-mini.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-item-mini.tsx diff --git a/clash-nyanpasu/src/components/proxy/proxy-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-item.tsx diff --git a/clash-nyanpasu/src/components/proxy/proxy-render.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-render.tsx similarity index 100% rename from clash-nyanpasu/src/components/proxy/proxy-render.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/proxy-render.tsx diff --git a/clash-nyanpasu/src/components/proxy/use-filter-sort.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-filter-sort.ts similarity index 100% rename from clash-nyanpasu/src/components/proxy/use-filter-sort.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-filter-sort.ts diff --git a/clash-nyanpasu/src/components/proxy/use-head-state.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-head-state.ts similarity index 100% rename from clash-nyanpasu/src/components/proxy/use-head-state.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-head-state.ts diff --git a/clash-nyanpasu/src/components/proxy/use-render-list.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-render-list.ts similarity index 100% rename from clash-nyanpasu/src/components/proxy/use-render-list.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-render-list.ts diff --git a/clash-nyanpasu/src/components/proxy/use-window-width.ts b/clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-window-width.ts similarity index 100% rename from clash-nyanpasu/src/components/proxy/use-window-width.ts rename to clash-nyanpasu/frontend/nyanpasu/src/components/proxy/use-window-width.ts diff --git a/clash-nyanpasu/src/components/rule/rule-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/rule/rule-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/rule/rule-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/rule/rule-item.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/clash-core-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/clash-core-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/clash-core-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/clash-core-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/clash-field-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/clash-field-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/clash-field-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/clash-field-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/clash-port-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/clash-port-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/clash-port-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/clash-port-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/config-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/config-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/config-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/config-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/controller-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/controller-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/controller-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/controller-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/guard-state.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/guard-state.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/guard-state.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/guard-state.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/hotkey-input.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/hotkey-input.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/hotkey-input.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/hotkey-input.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/hotkey-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/hotkey-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/hotkey-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/hotkey-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/layout-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/layout-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/layout-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/layout-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/misc-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/misc-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/misc-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/misc-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/service-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/service-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/service-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/service-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/setting-comp.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/setting-comp.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/setting-comp.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/setting-comp.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/sysproxy-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/sysproxy-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/sysproxy-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/sysproxy-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/tasks-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/tasks-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/tasks-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/tasks-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/theme-mode-switch.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/theme-mode-switch.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/theme-mode-switch.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/theme-mode-switch.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/theme-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/theme-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/theme-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/theme-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/update-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/update-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/update-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/update-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/web-ui-item.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/web-ui-item.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/web-ui-item.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/web-ui-item.tsx diff --git a/clash-nyanpasu/src/components/setting/mods/web-ui-viewer.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/web-ui-viewer.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/mods/web-ui-viewer.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/mods/web-ui-viewer.tsx diff --git a/clash-nyanpasu/src/components/setting/setting-clash.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/setting-clash.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/setting-clash.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/setting-clash.tsx diff --git a/clash-nyanpasu/src/components/setting/setting-system.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/setting-system.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/setting-system.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/setting-system.tsx diff --git a/clash-nyanpasu/src/components/setting/setting-verge.tsx b/clash-nyanpasu/frontend/nyanpasu/src/components/setting/setting-verge.tsx similarity index 100% rename from clash-nyanpasu/src/components/setting/setting-verge.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/components/setting/setting-verge.tsx diff --git a/clash-nyanpasu/src/hooks/use-clash.ts b/clash-nyanpasu/frontend/nyanpasu/src/hooks/use-clash.ts similarity index 100% rename from clash-nyanpasu/src/hooks/use-clash.ts rename to clash-nyanpasu/frontend/nyanpasu/src/hooks/use-clash.ts diff --git a/clash-nyanpasu/src/hooks/use-notification.ts b/clash-nyanpasu/frontend/nyanpasu/src/hooks/use-notification.ts similarity index 100% rename from clash-nyanpasu/src/hooks/use-notification.ts rename to clash-nyanpasu/frontend/nyanpasu/src/hooks/use-notification.ts diff --git a/clash-nyanpasu/src/hooks/use-profiles.ts b/clash-nyanpasu/frontend/nyanpasu/src/hooks/use-profiles.ts similarity index 100% rename from clash-nyanpasu/src/hooks/use-profiles.ts rename to clash-nyanpasu/frontend/nyanpasu/src/hooks/use-profiles.ts diff --git a/clash-nyanpasu/src/hooks/use-verge.ts b/clash-nyanpasu/frontend/nyanpasu/src/hooks/use-verge.ts similarity index 100% rename from clash-nyanpasu/src/hooks/use-verge.ts rename to clash-nyanpasu/frontend/nyanpasu/src/hooks/use-verge.ts diff --git a/clash-nyanpasu/src/hooks/use-visibility.ts b/clash-nyanpasu/frontend/nyanpasu/src/hooks/use-visibility.ts similarity index 100% rename from clash-nyanpasu/src/hooks/use-visibility.ts rename to clash-nyanpasu/frontend/nyanpasu/src/hooks/use-visibility.ts diff --git a/clash-nyanpasu/src/hooks/use-websocket.ts b/clash-nyanpasu/frontend/nyanpasu/src/hooks/use-websocket.ts similarity index 100% rename from clash-nyanpasu/src/hooks/use-websocket.ts rename to clash-nyanpasu/frontend/nyanpasu/src/hooks/use-websocket.ts diff --git a/clash-nyanpasu/src/locales/en.json b/clash-nyanpasu/frontend/nyanpasu/src/locales/en.json similarity index 100% rename from clash-nyanpasu/src/locales/en.json rename to clash-nyanpasu/frontend/nyanpasu/src/locales/en.json diff --git a/clash-nyanpasu/src/locales/ru.json b/clash-nyanpasu/frontend/nyanpasu/src/locales/ru.json similarity index 100% rename from clash-nyanpasu/src/locales/ru.json rename to clash-nyanpasu/frontend/nyanpasu/src/locales/ru.json diff --git a/clash-nyanpasu/src/locales/zh.json b/clash-nyanpasu/frontend/nyanpasu/src/locales/zh.json similarity index 100% rename from clash-nyanpasu/src/locales/zh.json rename to clash-nyanpasu/frontend/nyanpasu/src/locales/zh.json diff --git a/clash-nyanpasu/src/main.tsx b/clash-nyanpasu/frontend/nyanpasu/src/main.tsx similarity index 100% rename from clash-nyanpasu/src/main.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/main.tsx diff --git a/clash-nyanpasu/src/pages/404.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/404.tsx similarity index 100% rename from clash-nyanpasu/src/pages/404.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/404.tsx diff --git a/clash-nyanpasu/src/pages/_app.module.scss b/clash-nyanpasu/frontend/nyanpasu/src/pages/_app.module.scss similarity index 100% rename from clash-nyanpasu/src/pages/_app.module.scss rename to clash-nyanpasu/frontend/nyanpasu/src/pages/_app.module.scss diff --git a/clash-nyanpasu/src/pages/_app.module.scss.d.ts b/clash-nyanpasu/frontend/nyanpasu/src/pages/_app.module.scss.d.ts similarity index 100% rename from clash-nyanpasu/src/pages/_app.module.scss.d.ts rename to clash-nyanpasu/frontend/nyanpasu/src/pages/_app.module.scss.d.ts diff --git a/clash-nyanpasu/src/pages/_app.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/_app.tsx similarity index 100% rename from clash-nyanpasu/src/pages/_app.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/_app.tsx diff --git a/clash-nyanpasu/src/pages/_theme.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/_theme.tsx similarity index 100% rename from clash-nyanpasu/src/pages/_theme.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/_theme.tsx diff --git a/clash-nyanpasu/src/pages/connections.module.scss b/clash-nyanpasu/frontend/nyanpasu/src/pages/connections.module.scss similarity index 100% rename from clash-nyanpasu/src/pages/connections.module.scss rename to clash-nyanpasu/frontend/nyanpasu/src/pages/connections.module.scss diff --git a/clash-nyanpasu/src/pages/connections.module.scss.d.ts b/clash-nyanpasu/frontend/nyanpasu/src/pages/connections.module.scss.d.ts similarity index 100% rename from clash-nyanpasu/src/pages/connections.module.scss.d.ts rename to clash-nyanpasu/frontend/nyanpasu/src/pages/connections.module.scss.d.ts diff --git a/clash-nyanpasu/src/pages/connections.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/connections.tsx similarity index 100% rename from clash-nyanpasu/src/pages/connections.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/connections.tsx diff --git a/clash-nyanpasu/src/pages/logs.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/logs.tsx similarity index 100% rename from clash-nyanpasu/src/pages/logs.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/logs.tsx diff --git a/clash-nyanpasu/src/pages/profiles.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/profiles.tsx similarity index 100% rename from clash-nyanpasu/src/pages/profiles.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/profiles.tsx diff --git a/clash-nyanpasu/src/pages/providers.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/providers.tsx similarity index 100% rename from clash-nyanpasu/src/pages/providers.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/providers.tsx diff --git a/clash-nyanpasu/src/pages/proxies.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/proxies.tsx similarity index 100% rename from clash-nyanpasu/src/pages/proxies.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/proxies.tsx diff --git a/clash-nyanpasu/src/pages/rules.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/rules.tsx similarity index 100% rename from clash-nyanpasu/src/pages/rules.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/rules.tsx diff --git a/clash-nyanpasu/src/pages/settings.tsx b/clash-nyanpasu/frontend/nyanpasu/src/pages/settings.tsx similarity index 100% rename from clash-nyanpasu/src/pages/settings.tsx rename to clash-nyanpasu/frontend/nyanpasu/src/pages/settings.tsx diff --git a/clash-nyanpasu/src/router.ts b/clash-nyanpasu/frontend/nyanpasu/src/router.ts similarity index 100% rename from clash-nyanpasu/src/router.ts rename to clash-nyanpasu/frontend/nyanpasu/src/router.ts diff --git a/clash-nyanpasu/src/services/api.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/api.ts similarity index 100% rename from clash-nyanpasu/src/services/api.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/api.ts diff --git a/clash-nyanpasu/src/services/cmds.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/cmds.ts similarity index 100% rename from clash-nyanpasu/src/services/cmds.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/cmds.ts diff --git a/clash-nyanpasu/src/services/delay.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/delay.ts similarity index 100% rename from clash-nyanpasu/src/services/delay.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/delay.ts diff --git a/clash-nyanpasu/src/services/i18n.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/i18n.ts similarity index 100% rename from clash-nyanpasu/src/services/i18n.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/i18n.ts diff --git a/clash-nyanpasu/src/services/monaco.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/monaco.ts similarity index 100% rename from clash-nyanpasu/src/services/monaco.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/monaco.ts diff --git a/clash-nyanpasu/src/services/states.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/states.ts similarity index 100% rename from clash-nyanpasu/src/services/states.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/states.ts diff --git a/clash-nyanpasu/src/services/types.d.ts b/clash-nyanpasu/frontend/nyanpasu/src/services/types.d.ts similarity index 100% rename from clash-nyanpasu/src/services/types.d.ts rename to clash-nyanpasu/frontend/nyanpasu/src/services/types.d.ts diff --git a/clash-nyanpasu/src/utils/clash-fields.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/clash-fields.ts similarity index 100% rename from clash-nyanpasu/src/utils/clash-fields.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/clash-fields.ts diff --git a/clash-nyanpasu/src/utils/get-system.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/get-system.ts similarity index 100% rename from clash-nyanpasu/src/utils/get-system.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/get-system.ts diff --git a/clash-nyanpasu/src/utils/ignore-case.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/ignore-case.ts similarity index 100% rename from clash-nyanpasu/src/utils/ignore-case.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/ignore-case.ts diff --git a/clash-nyanpasu/src/utils/index.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/index.ts similarity index 100% rename from clash-nyanpasu/src/utils/index.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/index.ts diff --git a/clash-nyanpasu/src/utils/noop.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/noop.ts similarity index 100% rename from clash-nyanpasu/src/utils/noop.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/noop.ts diff --git a/clash-nyanpasu/src/utils/parse-hotkey.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/parse-hotkey.ts similarity index 100% rename from clash-nyanpasu/src/utils/parse-hotkey.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/parse-hotkey.ts diff --git a/clash-nyanpasu/src/utils/parse-traffic.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/parse-traffic.ts similarity index 100% rename from clash-nyanpasu/src/utils/parse-traffic.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/parse-traffic.ts diff --git a/clash-nyanpasu/src/utils/shiki.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/shiki.ts similarity index 100% rename from clash-nyanpasu/src/utils/shiki.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/shiki.ts diff --git a/clash-nyanpasu/src/utils/truncate-str.ts b/clash-nyanpasu/frontend/nyanpasu/src/utils/truncate-str.ts similarity index 100% rename from clash-nyanpasu/src/utils/truncate-str.ts rename to clash-nyanpasu/frontend/nyanpasu/src/utils/truncate-str.ts diff --git a/clash-nyanpasu/frontend/nyanpasu/tsconfig.json b/clash-nyanpasu/frontend/nyanpasu/tsconfig.json new file mode 100644 index 0000000000..d9612f84ea --- /dev/null +++ b/clash-nyanpasu/frontend/nyanpasu/tsconfig.json @@ -0,0 +1,25 @@ +{ + "compilerOptions": { + "baseUrl": ".", + "target": "ESNext", + "useDefineForClassFields": true, + "lib": ["DOM", "DOM.Iterable", "ESNext"], + "allowJs": false, + "skipLibCheck": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "forceConsistentCasingInFileNames": true, + "module": "ESNext", + "moduleResolution": "Node", + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "preserve", + "paths": { + "@/*": ["src/*"], + "~/*": ["./*"], + }, + }, + "include": ["./src"], +} diff --git a/clash-nyanpasu/vite.config.ts b/clash-nyanpasu/frontend/nyanpasu/vite.config.ts similarity index 100% rename from clash-nyanpasu/vite.config.ts rename to clash-nyanpasu/frontend/nyanpasu/vite.config.ts diff --git a/clash-nyanpasu/frontend/ui/package.json b/clash-nyanpasu/frontend/ui/package.json new file mode 100644 index 0000000000..612c711fc4 --- /dev/null +++ b/clash-nyanpasu/frontend/ui/package.json @@ -0,0 +1,4 @@ +{ + "name": "@nyanpasu/ui", + "version": "0.1.0" +} diff --git a/clash-nyanpasu/manifest/version.json b/clash-nyanpasu/manifest/version.json index 5c7c71f033..499659ffbb 100644 --- a/clash-nyanpasu/manifest/version.json +++ b/clash-nyanpasu/manifest/version.json @@ -2,7 +2,7 @@ "manifest_version": 1, "latest": { "mihomo": "v1.18.3", - "mihomo_alpha": "alpha-16fadd2", + "mihomo_alpha": "alpha-e3b69b8", "clash_rs": "v0.1.15", "clash_premium": "2023-09-05-gdcc8d87" }, @@ -36,5 +36,5 @@ "darwin-x64": "clash-darwin-amd64-n{}.gz" } }, - "updated_at": "2024-04-10T22:20:19.248Z" + "updated_at": "2024-04-11T22:19:31.146Z" } diff --git a/clash-nyanpasu/package.json b/clash-nyanpasu/package.json index 02c655f718..52845dd9f6 100644 --- a/clash-nyanpasu/package.json +++ b/clash-nyanpasu/package.json @@ -1,17 +1,21 @@ { - "name": "clash-nyanpasu", + "name": "@nyanpasu/monorepo", "version": "1.5.1", "license": "GPL-3.0", "type": "module", + "repository": "https://github.com/LibNyanpasu/clash-nyanpasu.git", + "engines": { + "node": ">=20.0.0" + }, "scripts": { "dev": "tauri dev -c ./backend/tauri/tauri.conf.json", "dev:diff": "tauri dev -f verge-dev deadlock-detection -c ./backend/tauri/tauri.conf.json", "build": "tauri build -c ./backend/tauri/tauri.conf.json", "build:nightly": "tauri build -f nightly -c ./backend/tauri/tauri.nightly.conf.json", "tauri": "tauri", - "web:dev": "vite", - "web:build": "vite build", - "web:serve": "vite preview", + "web:dev": "pnpm --filter=@nyanpasu/nyanpasu dev", + "web:build": "pnpm --filter=@nyanpasu/nyanpasu build", + "web:serve": "pnpm --filter=@nyanpasu/nyanpasu preview", "lint": "run-s lint:*", "lint:prettier": "prettier --check .", "lint:eslint": "eslint --cache .", @@ -63,57 +67,17 @@ } }, "dependencies": { - "@dnd-kit/core": "6.1.0", - "@dnd-kit/sortable": "8.0.0", - "@dnd-kit/utilities": "3.2.2", - "@emotion/react": "11.11.4", - "@emotion/styled": "11.11.5", - "@generouted/react-router": "1.18.8", - "@juggle/resize-observer": "3.4.0", - "@mui/icons-material": "5.15.15", - "@mui/lab": "5.0.0-alpha.170", - "@mui/material": "5.15.15", - "@mui/x-data-grid": "7.1.1", - "@tauri-apps/api": "1.5.3", - "ahooks": "3.7.11", - "axios": "1.6.8", - "dayjs": "1.11.10", - "framer-motion": "11.0.28", - "i18next": "23.11.1", - "lodash-es": "4.17.21", - "monaco-editor": "0.47.0", - "mui-color-input": "2.0.3", - "react": "18.2.0", - "react-dom": "18.2.0", - "react-error-boundary": "4.0.13", - "react-hook-form": "7.51.2", - "react-i18next": "14.1.0", - "react-markdown": "9.0.1", - "react-router-dom": "6.22.3", - "react-transition-group": "4.4.5", - "react-virtuoso": "4.7.8", - "recoil": "0.7.7", - "swr": "2.2.5" + "husky": "9.0.11", + "lodash-es": "4.17.21" }, "devDependencies": { - "@actions/github": "6.0.0", "@commitlint/cli": "19.2.1", "@commitlint/config-conventional": "19.1.0", "@tauri-apps/cli": "1.5.11", "@types/fs-extra": "11.0.4", - "@types/js-cookie": "3.0.6", "@types/lodash-es": "4.17.12", "@types/node": "20.12.7", - "@types/react": "18.2.75", - "@types/react-dom": "18.2.24", - "@types/react-transition-group": "4.4.10", - "@typescript-eslint/eslint-plugin": "7.6.0", - "@typescript-eslint/parser": "7.6.0", - "@vitejs/plugin-react": "4.2.1", - "adm-zip": "0.5.12", "autoprefixer": "10.4.19", - "colorize-template": "1.0.0", - "consola": "3.2.3", "conventional-changelog-conventionalcommits": "7.0.2", "cross-env": "7.0.3", "dedent": "1.5.3", @@ -127,21 +91,14 @@ "eslint-plugin-prettier": "5.1.3", "eslint-plugin-promise": "6.1.1", "eslint-plugin-react": "7.34.1", - "fs-extra": "11.2.0", - "https-proxy-agent": "7.0.4", - "husky": "9.0.11", "lint-staged": "15.2.2", - "node-fetch": "3.3.2", "npm-run-all2": "6.1.2", - "picocolors": "1.0.0", "postcss": "8.4.38", "postcss-html": "1.6.0", "postcss-import": "16.1.0", "postcss-scss": "4.0.9", "prettier": "3.2.5", "prettier-plugin-toml": "2.0.1", - "sass": "1.74.1", - "shiki": "1.3.0", "stylelint": "16.3.1", "stylelint-config-html": "1.1.0", "stylelint-config-recess-order": "5.0.0", @@ -149,13 +106,7 @@ "stylelint-declaration-block-no-ignored-properties": "2.8.0", "stylelint-order": "6.0.4", "stylelint-scss": "6.2.1", - "telegraf": "4.16.3", "tsx": "4.7.2", - "typescript": "5.4.5", - "vite": "5.2.8", - "vite-plugin-monaco-editor": "1.1.0", - "vite-plugin-sass-dts": "1.3.17", - "vite-plugin-svgr": "4.2.0", - "vite-tsconfig-paths": "4.3.2" + "typescript": "5.4.5" } } diff --git a/clash-nyanpasu/pnpm-lock.yaml b/clash-nyanpasu/pnpm-lock.yaml index f98136a144..985780d6e7 100644 --- a/clash-nyanpasu/pnpm-lock.yaml +++ b/clash-nyanpasu/pnpm-lock.yaml @@ -7,285 +7,300 @@ settings: overrides: vite-plugin-monaco-editor: npm:vite-plugin-monaco-editor-new@1.1.3 -dependencies: - '@dnd-kit/core': - specifier: 6.1.0 - version: 6.1.0(react-dom@18.2.0)(react@18.2.0) - '@dnd-kit/sortable': - specifier: 8.0.0 - version: 8.0.0(@dnd-kit/core@6.1.0)(react@18.2.0) - '@dnd-kit/utilities': - specifier: 3.2.2 - version: 3.2.2(react@18.2.0) - '@emotion/react': - specifier: 11.11.4 - version: 11.11.4(@types/react@18.2.75)(react@18.2.0) - '@emotion/styled': - specifier: 11.11.5 - version: 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0) - '@generouted/react-router': - specifier: 1.18.8 - version: 1.18.8(react-router-dom@6.22.3)(react@18.2.0)(vite@5.2.8) - '@juggle/resize-observer': - specifier: 3.4.0 - version: 3.4.0 - '@mui/icons-material': - specifier: 5.15.15 - version: 5.15.15(@mui/material@5.15.15)(@types/react@18.2.75)(react@18.2.0) - '@mui/lab': - specifier: 5.0.0-alpha.170 - version: 5.0.0-alpha.170(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@mui/material': - specifier: 5.15.15 - version: 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@mui/x-data-grid': - specifier: 7.1.1 - version: 7.1.1(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@tauri-apps/api': - specifier: 1.5.3 - version: 1.5.3 - ahooks: - specifier: 3.7.11 - version: 3.7.11(react@18.2.0) - axios: - specifier: 1.6.8 - version: 1.6.8 - dayjs: - specifier: 1.11.10 - version: 1.11.10 - framer-motion: - specifier: 11.0.28 - version: 11.0.28(react-dom@18.2.0)(react@18.2.0) - i18next: - specifier: 23.11.1 - version: 23.11.1 - lodash-es: - specifier: 4.17.21 - version: 4.17.21 - monaco-editor: - specifier: 0.47.0 - version: 0.47.0 - mui-color-input: - specifier: 2.0.3 - version: 2.0.3(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - react: - specifier: 18.2.0 - version: 18.2.0 - react-dom: - specifier: 18.2.0 - version: 18.2.0(react@18.2.0) - react-error-boundary: - specifier: 4.0.13 - version: 4.0.13(react@18.2.0) - react-hook-form: - specifier: 7.51.2 - version: 7.51.2(react@18.2.0) - react-i18next: - specifier: 14.1.0 - version: 14.1.0(i18next@23.11.1)(react-dom@18.2.0)(react@18.2.0) - react-markdown: - specifier: 9.0.1 - version: 9.0.1(@types/react@18.2.75)(react@18.2.0) - react-router-dom: - specifier: 6.22.3 - version: 6.22.3(react-dom@18.2.0)(react@18.2.0) - react-transition-group: - specifier: 4.4.5 - version: 4.4.5(react-dom@18.2.0)(react@18.2.0) - react-virtuoso: - specifier: 4.7.8 - version: 4.7.8(react-dom@18.2.0)(react@18.2.0) - recoil: - specifier: 0.7.7 - version: 0.7.7(react-dom@18.2.0)(react@18.2.0) - swr: - specifier: 2.2.5 - version: 2.2.5(react@18.2.0) +importers: -devDependencies: - '@actions/github': - specifier: 6.0.0 - version: 6.0.0 - '@commitlint/cli': - specifier: 19.2.1 - version: 19.2.1(@types/node@20.12.7)(typescript@5.4.5) - '@commitlint/config-conventional': - specifier: 19.1.0 - version: 19.1.0 - '@tauri-apps/cli': - specifier: 1.5.11 - version: 1.5.11 - '@types/fs-extra': - specifier: 11.0.4 - version: 11.0.4 - '@types/js-cookie': - specifier: 3.0.6 - version: 3.0.6 - '@types/lodash-es': - specifier: 4.17.12 - version: 4.17.12 - '@types/node': - specifier: 20.12.7 - version: 20.12.7 - '@types/react': - specifier: 18.2.75 - version: 18.2.75 - '@types/react-dom': - specifier: 18.2.24 - version: 18.2.24 - '@types/react-transition-group': - specifier: 4.4.10 - version: 4.4.10 - '@typescript-eslint/eslint-plugin': - specifier: 7.6.0 - version: 7.6.0(@typescript-eslint/parser@7.6.0)(eslint@8.57.0)(typescript@5.4.5) - '@typescript-eslint/parser': - specifier: 7.6.0 - version: 7.6.0(eslint@8.57.0)(typescript@5.4.5) - '@vitejs/plugin-react': - specifier: 4.2.1 - version: 4.2.1(vite@5.2.8) - adm-zip: - specifier: 0.5.12 - version: 0.5.12 - autoprefixer: - specifier: 10.4.19 - version: 10.4.19(postcss@8.4.38) - colorize-template: - specifier: 1.0.0 - version: 1.0.0 - consola: - specifier: 3.2.3 - version: 3.2.3 - conventional-changelog-conventionalcommits: - specifier: 7.0.2 - version: 7.0.2 - cross-env: - specifier: 7.0.3 - version: 7.0.3 - dedent: - specifier: 1.5.3 - version: 1.5.3 - eslint: - specifier: 8.57.0 - version: 8.57.0 - eslint-config-prettier: - specifier: 9.1.0 - version: 9.1.0(eslint@8.57.0) - eslint-config-standard: - specifier: 17.1.0 - version: 17.1.0(eslint-plugin-import@2.29.1)(eslint-plugin-n@16.6.2)(eslint-plugin-promise@6.1.1)(eslint@8.57.0) - eslint-import-resolver-alias: - specifier: 1.1.2 - version: 1.1.2(eslint-plugin-import@2.29.1) - eslint-plugin-html: - specifier: 8.1.0 - version: 8.1.0 - eslint-plugin-import: - specifier: 2.29.1 - version: 2.29.1(@typescript-eslint/parser@7.6.0)(eslint@8.57.0) - eslint-plugin-n: - specifier: 16.6.2 - version: 16.6.2(eslint@8.57.0) - eslint-plugin-prettier: - specifier: 5.1.3 - version: 5.1.3(eslint-config-prettier@9.1.0)(eslint@8.57.0)(prettier@3.2.5) - eslint-plugin-promise: - specifier: 6.1.1 - version: 6.1.1(eslint@8.57.0) - eslint-plugin-react: - specifier: 7.34.1 - version: 7.34.1(eslint@8.57.0) - fs-extra: - specifier: 11.2.0 - version: 11.2.0 - https-proxy-agent: - specifier: 7.0.4 - version: 7.0.4 - husky: - specifier: 9.0.11 - version: 9.0.11 - lint-staged: - specifier: 15.2.2 - version: 15.2.2 - node-fetch: - specifier: 3.3.2 - version: 3.3.2 - npm-run-all2: - specifier: 6.1.2 - version: 6.1.2 - picocolors: - specifier: 1.0.0 - version: 1.0.0 - postcss: - specifier: 8.4.38 - version: 8.4.38 - postcss-html: - specifier: 1.6.0 - version: 1.6.0 - postcss-import: - specifier: 16.1.0 - version: 16.1.0(postcss@8.4.38) - postcss-scss: - specifier: 4.0.9 - version: 4.0.9(postcss@8.4.38) - prettier: - specifier: 3.2.5 - version: 3.2.5 - prettier-plugin-toml: - specifier: 2.0.1 - version: 2.0.1(prettier@3.2.5) - sass: - specifier: 1.74.1 - version: 1.74.1 - shiki: - specifier: 1.3.0 - version: 1.3.0 - stylelint: - specifier: 16.3.1 - version: 16.3.1(typescript@5.4.5) - stylelint-config-html: - specifier: 1.1.0 - version: 1.1.0(postcss-html@1.6.0)(stylelint@16.3.1) - stylelint-config-recess-order: - specifier: 5.0.0 - version: 5.0.0(stylelint@16.3.1) - stylelint-config-standard: - specifier: 36.0.0 - version: 36.0.0(stylelint@16.3.1) - stylelint-declaration-block-no-ignored-properties: - specifier: 2.8.0 - version: 2.8.0(stylelint@16.3.1) - stylelint-order: - specifier: 6.0.4 - version: 6.0.4(stylelint@16.3.1) - stylelint-scss: - specifier: 6.2.1 - version: 6.2.1(stylelint@16.3.1) - telegraf: - specifier: 4.16.3 - version: 4.16.3 - tsx: - specifier: 4.7.2 - version: 4.7.2 - typescript: - specifier: 5.4.5 - version: 5.4.5 - vite: - specifier: 5.2.8 - version: 5.2.8(@types/node@20.12.7)(sass@1.74.1) - vite-plugin-monaco-editor: - specifier: npm:vite-plugin-monaco-editor-new@1.1.3 - version: /vite-plugin-monaco-editor-new@1.1.3(monaco-editor@0.47.0) - vite-plugin-sass-dts: - specifier: 1.3.17 - version: 1.3.17(postcss@8.4.38)(prettier@3.2.5)(sass@1.74.1)(vite@5.2.8) - vite-plugin-svgr: - specifier: 4.2.0 - version: 4.2.0(typescript@5.4.5)(vite@5.2.8) - vite-tsconfig-paths: - specifier: 4.3.2 - version: 4.3.2(typescript@5.4.5)(vite@5.2.8) + .: + dependencies: + husky: + specifier: 9.0.11 + version: 9.0.11 + lodash-es: + specifier: 4.17.21 + version: 4.17.21 + devDependencies: + '@commitlint/cli': + specifier: 19.2.1 + version: 19.2.1(@types/node@20.12.7)(typescript@5.4.5) + '@commitlint/config-conventional': + specifier: 19.1.0 + version: 19.1.0 + '@tauri-apps/cli': + specifier: 1.5.11 + version: 1.5.11 + '@types/fs-extra': + specifier: 11.0.4 + version: 11.0.4 + '@types/lodash-es': + specifier: 4.17.12 + version: 4.17.12 + '@types/node': + specifier: 20.12.7 + version: 20.12.7 + autoprefixer: + specifier: 10.4.19 + version: 10.4.19(postcss@8.4.38) + conventional-changelog-conventionalcommits: + specifier: 7.0.2 + version: 7.0.2 + cross-env: + specifier: 7.0.3 + version: 7.0.3 + dedent: + specifier: 1.5.3 + version: 1.5.3 + eslint: + specifier: 8.57.0 + version: 8.57.0 + eslint-config-prettier: + specifier: 9.1.0 + version: 9.1.0(eslint@8.57.0) + eslint-config-standard: + specifier: 17.1.0 + version: 17.1.0(eslint-plugin-import@2.29.1)(eslint-plugin-n@16.6.2)(eslint-plugin-promise@6.1.1)(eslint@8.57.0) + eslint-import-resolver-alias: + specifier: 1.1.2 + version: 1.1.2(eslint-plugin-import@2.29.1) + eslint-plugin-html: + specifier: 8.1.0 + version: 8.1.0 + eslint-plugin-import: + specifier: 2.29.1 + version: 2.29.1(eslint@8.57.0) + eslint-plugin-n: + specifier: 16.6.2 + version: 16.6.2(eslint@8.57.0) + eslint-plugin-prettier: + specifier: 5.1.3 + version: 5.1.3(eslint-config-prettier@9.1.0)(eslint@8.57.0)(prettier@3.2.5) + eslint-plugin-promise: + specifier: 6.1.1 + version: 6.1.1(eslint@8.57.0) + eslint-plugin-react: + specifier: 7.34.1 + version: 7.34.1(eslint@8.57.0) + lint-staged: + specifier: 15.2.2 + version: 15.2.2 + npm-run-all2: + specifier: 6.1.2 + version: 6.1.2 + postcss: + specifier: 8.4.38 + version: 8.4.38 + postcss-html: + specifier: 1.6.0 + version: 1.6.0 + postcss-import: + specifier: 16.1.0 + version: 16.1.0(postcss@8.4.38) + postcss-scss: + specifier: 4.0.9 + version: 4.0.9(postcss@8.4.38) + prettier: + specifier: 3.2.5 + version: 3.2.5 + prettier-plugin-toml: + specifier: 2.0.1 + version: 2.0.1(prettier@3.2.5) + stylelint: + specifier: 16.3.1 + version: 16.3.1(typescript@5.4.5) + stylelint-config-html: + specifier: 1.1.0 + version: 1.1.0(postcss-html@1.6.0)(stylelint@16.3.1) + stylelint-config-recess-order: + specifier: 5.0.0 + version: 5.0.0(stylelint@16.3.1) + stylelint-config-standard: + specifier: 36.0.0 + version: 36.0.0(stylelint@16.3.1) + stylelint-declaration-block-no-ignored-properties: + specifier: 2.8.0 + version: 2.8.0(stylelint@16.3.1) + stylelint-order: + specifier: 6.0.4 + version: 6.0.4(stylelint@16.3.1) + stylelint-scss: + specifier: 6.2.1 + version: 6.2.1(stylelint@16.3.1) + tsx: + specifier: 4.7.2 + version: 4.7.2 + typescript: + specifier: 5.4.5 + version: 5.4.5 + + frontend/nyanpasu: + dependencies: + '@dnd-kit/core': + specifier: 6.1.0 + version: 6.1.0(react-dom@18.2.0)(react@18.2.0) + '@dnd-kit/sortable': + specifier: 8.0.0 + version: 8.0.0(@dnd-kit/core@6.1.0)(react@18.2.0) + '@dnd-kit/utilities': + specifier: 3.2.2 + version: 3.2.2(react@18.2.0) + '@emotion/react': + specifier: 11.11.4 + version: 11.11.4(@types/react@18.2.77)(react@18.2.0) + '@emotion/styled': + specifier: 11.11.5 + version: 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0) + '@generouted/react-router': + specifier: 1.18.8 + version: 1.18.8(react-router-dom@6.22.3)(react@18.2.0)(vite@5.2.8) + '@juggle/resize-observer': + specifier: 3.4.0 + version: 3.4.0 + '@mui/icons-material': + specifier: 5.15.15 + version: 5.15.15(@mui/material@5.15.15)(@types/react@18.2.77)(react@18.2.0) + '@mui/lab': + specifier: 5.0.0-alpha.170 + version: 5.0.0-alpha.170(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@mui/material': + specifier: 5.15.15 + version: 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@mui/x-data-grid': + specifier: 7.1.1 + version: 7.1.1(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@tauri-apps/api': + specifier: 1.5.3 + version: 1.5.3 + ahooks: + specifier: 3.7.11 + version: 3.7.11(react@18.2.0) + axios: + specifier: 1.6.8 + version: 1.6.8 + dayjs: + specifier: 1.11.10 + version: 1.11.10 + framer-motion: + specifier: 11.0.28 + version: 11.0.28(react-dom@18.2.0)(react@18.2.0) + i18next: + specifier: 23.11.1 + version: 23.11.1 + monaco-editor: + specifier: 0.47.0 + version: 0.47.0 + mui-color-input: + specifier: 2.0.3 + version: 2.0.3(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + react: + specifier: 18.2.0 + version: 18.2.0 + react-dom: + specifier: 18.2.0 + version: 18.2.0(react@18.2.0) + react-error-boundary: + specifier: 4.0.13 + version: 4.0.13(react@18.2.0) + react-hook-form: + specifier: 7.51.3 + version: 7.51.3(react@18.2.0) + react-i18next: + specifier: 14.1.0 + version: 14.1.0(i18next@23.11.1)(react-dom@18.2.0)(react@18.2.0) + react-markdown: + specifier: 9.0.1 + version: 9.0.1(@types/react@18.2.77)(react@18.2.0) + react-router-dom: + specifier: 6.22.3 + version: 6.22.3(react-dom@18.2.0)(react@18.2.0) + react-transition-group: + specifier: 4.4.5 + version: 4.4.5(react-dom@18.2.0)(react@18.2.0) + react-virtuoso: + specifier: 4.7.8 + version: 4.7.8(react-dom@18.2.0)(react@18.2.0) + recoil: + specifier: 0.7.7 + version: 0.7.7(react-dom@18.2.0)(react@18.2.0) + swr: + specifier: 2.2.5 + version: 2.2.5(react@18.2.0) + devDependencies: + '@types/js-cookie': + specifier: 3.0.6 + version: 3.0.6 + '@types/react': + specifier: 18.2.77 + version: 18.2.77 + '@types/react-dom': + specifier: 18.2.25 + version: 18.2.25 + '@types/react-transition-group': + specifier: 4.4.10 + version: 4.4.10 + '@typescript-eslint/eslint-plugin': + specifier: 7.6.0 + version: 7.6.0(@typescript-eslint/parser@7.6.0)(eslint@8.57.0)(typescript@5.4.5) + '@typescript-eslint/parser': + specifier: 7.6.0 + version: 7.6.0(eslint@8.57.0)(typescript@5.4.5) + '@vitejs/plugin-react': + specifier: 4.2.1 + version: 4.2.1(vite@5.2.8) + sass: + specifier: 1.74.1 + version: 1.74.1 + shiki: + specifier: 1.3.0 + version: 1.3.0 + vite: + specifier: 5.2.8 + version: 5.2.8(@types/node@20.12.7)(sass@1.74.1) + vite-plugin-monaco-editor: + specifier: npm:vite-plugin-monaco-editor-new@1.1.3 + version: /vite-plugin-monaco-editor-new@1.1.3(monaco-editor@0.47.0) + vite-plugin-sass-dts: + specifier: 1.3.17 + version: 1.3.17(postcss@8.4.38)(prettier@3.2.5)(sass@1.74.1)(vite@5.2.8) + vite-plugin-svgr: + specifier: 4.2.0 + version: 4.2.0(typescript@5.4.5)(vite@5.2.8) + vite-tsconfig-paths: + specifier: 4.3.2 + version: 4.3.2(typescript@5.4.5)(vite@5.2.8) + + frontend/ui: {} + + scripts: + dependencies: + '@actions/github': + specifier: 6.0.0 + version: 6.0.0 + telegraf: + specifier: 4.16.3 + version: 4.16.3 + devDependencies: + '@types/adm-zip': + specifier: 0.5.5 + version: 0.5.5 + adm-zip: + specifier: 0.5.12 + version: 0.5.12 + colorize-template: + specifier: 1.0.0 + version: 1.0.0 + consola: + specifier: 3.2.3 + version: 3.2.3 + fs-extra: + specifier: 11.2.0 + version: 11.2.0 + https-proxy-agent: + specifier: 7.0.4 + version: 7.0.4 + node-fetch: + specifier: 3.3.2 + version: 3.3.2 + picocolors: + specifier: 1.0.0 + version: 1.0.0 packages: @@ -301,14 +316,14 @@ packages: '@octokit/core': 5.0.2 '@octokit/plugin-paginate-rest': 9.1.5(@octokit/core@5.0.2) '@octokit/plugin-rest-endpoint-methods': 10.2.0(@octokit/core@5.0.2) - dev: true + dev: false /@actions/http-client@2.2.0: resolution: {integrity: sha512-q+epW0trjVUUHboliPb4UF9g2msf+w61b32tAkFEwL/IwP0DQWgbCMM0Hbe3e3WXSKz5VcUXbzJQgy8Hkra/Lg==} dependencies: tunnel: 0.0.6 undici: 5.28.2 - dev: true + dev: false /@ampproject/remapping@2.2.1: resolution: {integrity: sha512-lFMjJTrFL3j7L9yBxwYfCq2k6qqwHyzuUl/XBnif78PWTJYyL/dfowQHWE3sp6U6ZzqWiiIZnpTMO96zhkjwtg==} @@ -493,20 +508,6 @@ packages: '@babel/helper-plugin-utils': 7.22.5 dev: true - /@babel/runtime@7.23.8: - resolution: {integrity: sha512-Y7KbAP984rn1VGMbGqKmBLio9V7y5Je9GvU4rQPCPinCyNfUcToxIXl06d59URp/F3LwinvODxab5N/G6qggkw==} - engines: {node: '>=6.9.0'} - dependencies: - regenerator-runtime: 0.14.0 - dev: false - - /@babel/runtime@7.23.9: - resolution: {integrity: sha512-0CX6F+BI2s9dkUqr08KFrAIZgNFj75rdBU/DjCyYLIaV/quFjkk6T+EJ2LkZHyZTbEV4L5p97mNkUsHl2wLFAw==} - engines: {node: '>=6.9.0'} - dependencies: - regenerator-runtime: 0.14.0 - dev: false - /@babel/runtime@7.24.1: resolution: {integrity: sha512-+BIznRzyqBf+2wCTxcKE3wDjfGeCoVE61KSHGpkzqrLi8qxqFwBeUFyId2cxkTmm55fzDGnm0+yCxaxygrLUnQ==} engines: {node: '>=6.9.0'} @@ -838,7 +839,7 @@ packages: resolution: {integrity: sha512-W2P2c/VRW1/1tLox0mVUalvnWXxavmv/Oum2aPsRcoDJuob75FC3Y8FbpfLwUegRcxINtGUMPq0tFCvYNTBXNA==} dev: false - /@emotion/react@11.11.4(@types/react@18.2.75)(react@18.2.0): + /@emotion/react@11.11.4(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-t8AjMlF0gHpvvxk5mAtCqR4vmxiGHCeJBaQO6gncUSdklELOgtwjerNY2yuJNfwnc6vi16U/+uMF+afIawJ9iw==} peerDependencies: '@types/react': '*' @@ -847,28 +848,18 @@ packages: '@types/react': optional: true dependencies: - '@babel/runtime': 7.23.9 + '@babel/runtime': 7.24.1 '@emotion/babel-plugin': 11.11.0 '@emotion/cache': 11.11.0 - '@emotion/serialize': 1.1.3 + '@emotion/serialize': 1.1.4 '@emotion/use-insertion-effect-with-fallbacks': 1.0.1(react@18.2.0) '@emotion/utils': 1.2.1 '@emotion/weak-memoize': 0.3.1 - '@types/react': 18.2.75 + '@types/react': 18.2.77 hoist-non-react-statics: 3.3.2 react: 18.2.0 dev: false - /@emotion/serialize@1.1.3: - resolution: {integrity: sha512-iD4D6QVZFDhcbH0RAG1uVu1CwVLMWUkCvAqqlewO/rxf8+87yIBAlt4+AxMiiKPLs5hFc0owNk/sLLAOROw3cA==} - dependencies: - '@emotion/hash': 0.9.1 - '@emotion/memoize': 0.8.1 - '@emotion/unitless': 0.8.1 - '@emotion/utils': 1.2.1 - csstype: 3.1.3 - dev: false - /@emotion/serialize@1.1.4: resolution: {integrity: sha512-RIN04MBT8g+FnDwgvIUi8czvr1LU1alUMI05LekWB5DGyTm8cCBMCRpq3GqaiyEDRptEXOyXnvZ58GZYu4kBxQ==} dependencies: @@ -883,7 +874,7 @@ packages: resolution: {integrity: sha512-0QBtGvaqtWi+nx6doRwDdBIzhNdZrXUppvTM4dtZZWEGTXL/XE/yJxLMGlDT1Gt+UHH5IX1n+jkXyytE/av7OA==} dev: false - /@emotion/styled@11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0): + /@emotion/styled@11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-/ZjjnaNKvuMPxcIiUkf/9SHoG4Q196DRl1w82hQ3WCsjo1IUR8uaGWrC6a87CrYAW0Kb/pK7hk8BnLgLRi9KoQ==} peerDependencies: '@emotion/react': ^11.0.0-rc.0 @@ -896,11 +887,11 @@ packages: '@babel/runtime': 7.24.1 '@emotion/babel-plugin': 11.11.0 '@emotion/is-prop-valid': 1.2.2 - '@emotion/react': 11.11.4(@types/react@18.2.75)(react@18.2.0) + '@emotion/react': 11.11.4(@types/react@18.2.77)(react@18.2.0) '@emotion/serialize': 1.1.4 '@emotion/use-insertion-effect-with-fallbacks': 1.0.1(react@18.2.0) '@emotion/utils': 1.2.1 - '@types/react': 18.2.75 + '@types/react': 18.2.77 react: 18.2.0 dev: false @@ -1355,7 +1346,7 @@ packages: /@fastify/busboy@2.1.0: resolution: {integrity: sha512-+KpH+QxZU7O4675t3mnkQKcZZg56u+K/Ct2K+N2AZYNVK8kyeo/bI18tI8aPm3tvNNRyTWfj6s5tnGNlcbQRsA==} engines: {node: '>=14'} - dev: true + dev: false /@floating-ui/core@1.6.0: resolution: {integrity: sha512-PcF++MykgmTj3CIyOQbKA/hDzOAiqI3mhuoN44WRCopIs1sgoDoU4oty4Jtqaj/y3oDU6fnVSm4QG0a3t5i0+g==} @@ -1465,7 +1456,7 @@ packages: resolution: {integrity: sha512-dfLbk+PwWvFzSxwk3n5ySL0hfBog779o8h68wK/7/APo/7cgyWp5jcXockbxdk5kFRkbeXWm4Fbi9FrdN381sA==} dev: false - /@mui/base@5.0.0-beta.40(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0): + /@mui/base@5.0.0-beta.40(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-I/lGHztkCzvwlXpjD2+SNmvNQvB4227xBXhISPjEaJUXGImOQ9f3D2Yj/T3KasSI/h0MLWy74X0J6clhPmsRbQ==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1478,10 +1469,10 @@ packages: dependencies: '@babel/runtime': 7.24.1 '@floating-ui/react-dom': 2.0.8(react-dom@18.2.0)(react@18.2.0) - '@mui/types': 7.2.14(@types/react@18.2.75) - '@mui/utils': 5.15.14(@types/react@18.2.75)(react@18.2.0) + '@mui/types': 7.2.14(@types/react@18.2.77) + '@mui/utils': 5.15.14(@types/react@18.2.77)(react@18.2.0) '@popperjs/core': 2.11.8 - '@types/react': 18.2.75 + '@types/react': 18.2.77 clsx: 2.1.0 prop-types: 15.8.1 react: 18.2.0 @@ -1492,7 +1483,7 @@ packages: resolution: {integrity: sha512-aXnw29OWQ6I5A47iuWEI6qSSUfH6G/aCsW9KmW3LiFqr7uXZBK4Ks+z8G+qeIub8k0T5CMqlT2q0L+ZJTMrqpg==} dev: false - /@mui/icons-material@5.15.15(@mui/material@5.15.15)(@types/react@18.2.75)(react@18.2.0): + /@mui/icons-material@5.15.15(@mui/material@5.15.15)(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-kkeU/pe+hABcYDH6Uqy8RmIsr2S/y5bP2rp+Gat4CcRjCcVne6KudS1NrZQhUCRysrTDCAhcbcf9gt+/+pGO2g==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1504,12 +1495,12 @@ packages: optional: true dependencies: '@babel/runtime': 7.24.1 - '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@types/react': 18.2.75 + '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@types/react': 18.2.77 react: 18.2.0 dev: false - /@mui/lab@5.0.0-alpha.170(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0): + /@mui/lab@5.0.0-alpha.170(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-0bDVECGmrNjd3+bLdcLiwYZ0O4HP5j5WSQm5DV6iA/Z9kr8O6AnvZ1bv9ImQbbX7Gj3pX4o43EKwCutj3EQxQg==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1528,21 +1519,21 @@ packages: optional: true dependencies: '@babel/runtime': 7.24.1 - '@emotion/react': 11.11.4(@types/react@18.2.75)(react@18.2.0) - '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0) - '@mui/base': 5.0.0-beta.40(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@mui/system': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react@18.2.0) - '@mui/types': 7.2.14(@types/react@18.2.75) - '@mui/utils': 5.15.14(@types/react@18.2.75)(react@18.2.0) - '@types/react': 18.2.75 + '@emotion/react': 11.11.4(@types/react@18.2.77)(react@18.2.0) + '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0) + '@mui/base': 5.0.0-beta.40(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@mui/system': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react@18.2.0) + '@mui/types': 7.2.14(@types/react@18.2.77) + '@mui/utils': 5.15.14(@types/react@18.2.77)(react@18.2.0) + '@types/react': 18.2.77 clsx: 2.1.0 prop-types: 15.8.1 react: 18.2.0 react-dom: 18.2.0(react@18.2.0) dev: false - /@mui/material@5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0): + /@mui/material@5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-3zvWayJ+E1kzoIsvwyEvkTUKVKt1AjchFFns+JtluHCuvxgKcLSRJTADw37k0doaRtVAsyh8bz9Afqzv+KYrIA==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1560,14 +1551,14 @@ packages: optional: true dependencies: '@babel/runtime': 7.24.1 - '@emotion/react': 11.11.4(@types/react@18.2.75)(react@18.2.0) - '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0) - '@mui/base': 5.0.0-beta.40(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) + '@emotion/react': 11.11.4(@types/react@18.2.77)(react@18.2.0) + '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0) + '@mui/base': 5.0.0-beta.40(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) '@mui/core-downloads-tracker': 5.15.15 - '@mui/system': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react@18.2.0) - '@mui/types': 7.2.14(@types/react@18.2.75) - '@mui/utils': 5.15.14(@types/react@18.2.75)(react@18.2.0) - '@types/react': 18.2.75 + '@mui/system': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react@18.2.0) + '@mui/types': 7.2.14(@types/react@18.2.77) + '@mui/utils': 5.15.14(@types/react@18.2.77)(react@18.2.0) + '@types/react': 18.2.77 '@types/react-transition-group': 4.4.10 clsx: 2.1.0 csstype: 3.1.3 @@ -1578,7 +1569,7 @@ packages: react-transition-group: 4.4.5(react-dom@18.2.0)(react@18.2.0) dev: false - /@mui/private-theming@5.15.14(@types/react@18.2.75)(react@18.2.0): + /@mui/private-theming@5.15.14(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-UH0EiZckOWcxiXLX3Jbb0K7rC8mxTr9L9l6QhOZxYc4r8FHUkefltV9VDGLrzCaWh30SQiJvAEd7djX3XXY6Xw==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1589,8 +1580,8 @@ packages: optional: true dependencies: '@babel/runtime': 7.24.1 - '@mui/utils': 5.15.14(@types/react@18.2.75)(react@18.2.0) - '@types/react': 18.2.75 + '@mui/utils': 5.15.14(@types/react@18.2.77)(react@18.2.0) + '@types/react': 18.2.77 prop-types: 15.8.1 react: 18.2.0 dev: false @@ -1610,14 +1601,14 @@ packages: dependencies: '@babel/runtime': 7.24.1 '@emotion/cache': 11.11.0 - '@emotion/react': 11.11.4(@types/react@18.2.75)(react@18.2.0) - '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0) + '@emotion/react': 11.11.4(@types/react@18.2.77)(react@18.2.0) + '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0) csstype: 3.1.3 prop-types: 15.8.1 react: 18.2.0 dev: false - /@mui/system@5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react@18.2.0): + /@mui/system@5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-aulox6N1dnu5PABsfxVGOZffDVmlxPOVgj56HrUnJE8MCSh8lOvvkd47cebIVQQYAjpwieXQXiDPj5pwM40jTQ==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1634,20 +1625,20 @@ packages: optional: true dependencies: '@babel/runtime': 7.24.1 - '@emotion/react': 11.11.4(@types/react@18.2.75)(react@18.2.0) - '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0) - '@mui/private-theming': 5.15.14(@types/react@18.2.75)(react@18.2.0) + '@emotion/react': 11.11.4(@types/react@18.2.77)(react@18.2.0) + '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0) + '@mui/private-theming': 5.15.14(@types/react@18.2.77)(react@18.2.0) '@mui/styled-engine': 5.15.14(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(react@18.2.0) - '@mui/types': 7.2.14(@types/react@18.2.75) - '@mui/utils': 5.15.14(@types/react@18.2.75)(react@18.2.0) - '@types/react': 18.2.75 + '@mui/types': 7.2.14(@types/react@18.2.77) + '@mui/utils': 5.15.14(@types/react@18.2.77)(react@18.2.0) + '@types/react': 18.2.77 clsx: 2.1.0 csstype: 3.1.3 prop-types: 15.8.1 react: 18.2.0 dev: false - /@mui/types@7.2.14(@types/react@18.2.75): + /@mui/types@7.2.14(@types/react@18.2.77): resolution: {integrity: sha512-MZsBZ4q4HfzBsywtXgM1Ksj6HDThtiwmOKUXH1pKYISI9gAVXCNHNpo7TlGoGrBaYWZTdNoirIN7JsQcQUjmQQ==} peerDependencies: '@types/react': ^17.0.0 || ^18.0.0 @@ -1655,10 +1646,10 @@ packages: '@types/react': optional: true dependencies: - '@types/react': 18.2.75 + '@types/react': 18.2.77 dev: false - /@mui/utils@5.15.14(@types/react@18.2.75)(react@18.2.0): + /@mui/utils@5.15.14(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-0lF/7Hh/ezDv5X7Pry6enMsbYyGKjADzvHyo3Qrc/SSlTsQ1VkbDMbH0m2t3OR5iIVLwMoxwM7yGd+6FCMtTFA==} engines: {node: '>=12.0.0'} peerDependencies: @@ -1670,13 +1661,13 @@ packages: dependencies: '@babel/runtime': 7.24.1 '@types/prop-types': 15.7.11 - '@types/react': 18.2.75 + '@types/react': 18.2.77 prop-types: 15.8.1 react: 18.2.0 react-is: 18.2.0 dev: false - /@mui/x-data-grid@7.1.1(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0): + /@mui/x-data-grid@7.1.1(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-hNvz927lkAznFdy45QPE7mIZVyQhlqveHmTK9+SD0N1us4sSTij90uUJ/roTNDod0VA9f5GqWmNz+5h8ihpz6Q==} engines: {node: '>=14.0.0'} peerDependencies: @@ -1685,9 +1676,9 @@ packages: react-dom: ^17.0.0 || ^18.0.0 dependencies: '@babel/runtime': 7.24.1 - '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@mui/system': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react@18.2.0) - '@mui/utils': 5.15.14(@types/react@18.2.75)(react@18.2.0) + '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@mui/system': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react@18.2.0) + '@mui/utils': 5.15.14(@types/react@18.2.77)(react@18.2.0) clsx: 2.1.0 prop-types: 15.8.1 react: 18.2.0 @@ -1720,7 +1711,7 @@ packages: /@octokit/auth-token@4.0.0: resolution: {integrity: sha512-tY/msAuJo6ARbK6SPIxZrPBms3xPbfwBrulZe0Wtr/DIY9lje2HeV1uoebShn6mx7SjCHif6EjMvoREj+gZ+SA==} engines: {node: '>= 18'} - dev: true + dev: false /@octokit/core@5.0.2: resolution: {integrity: sha512-cZUy1gUvd4vttMic7C0lwPed8IYXWYp8kHIMatyhY8t8n3Cpw2ILczkV5pGMPqef7v0bLo0pOHrEHarsau2Ydg==} @@ -1733,7 +1724,7 @@ packages: '@octokit/types': 12.4.0 before-after-hook: 2.2.2 universal-user-agent: 6.0.0 - dev: true + dev: false /@octokit/endpoint@9.0.4: resolution: {integrity: sha512-DWPLtr1Kz3tv8L0UvXTDP1fNwM0S+z6EJpRcvH66orY6Eld4XBMCSYsaWp4xIm61jTWxK68BrR7ibO+vSDnZqw==} @@ -1741,7 +1732,7 @@ packages: dependencies: '@octokit/types': 12.4.0 universal-user-agent: 6.0.0 - dev: true + dev: false /@octokit/graphql@7.0.2: resolution: {integrity: sha512-OJ2iGMtj5Tg3s6RaXH22cJcxXRi7Y3EBqbHTBRq+PQAqfaS8f/236fUrWhfSn8P4jovyzqucxme7/vWSSZBX2Q==} @@ -1750,11 +1741,11 @@ packages: '@octokit/request': 8.1.6 '@octokit/types': 12.4.0 universal-user-agent: 6.0.0 - dev: true + dev: false /@octokit/openapi-types@19.1.0: resolution: {integrity: sha512-6G+ywGClliGQwRsjvqVYpklIfa7oRPA0vyhPQG/1Feh+B+wU0vGH1JiJ5T25d3g1JZYBHzR2qefLi9x8Gt+cpw==} - dev: true + dev: false /@octokit/plugin-paginate-rest@9.1.5(@octokit/core@5.0.2): resolution: {integrity: sha512-WKTQXxK+bu49qzwv4qKbMMRXej1DU2gq017euWyKVudA6MldaSSQuxtz+vGbhxV4CjxpUxjZu6rM2wfc1FiWVg==} @@ -1764,7 +1755,7 @@ packages: dependencies: '@octokit/core': 5.0.2 '@octokit/types': 12.4.0 - dev: true + dev: false /@octokit/plugin-rest-endpoint-methods@10.2.0(@octokit/core@5.0.2): resolution: {integrity: sha512-ePbgBMYtGoRNXDyKGvr9cyHjQ163PbwD0y1MkDJCpkO2YH4OeXX40c4wYHKikHGZcpGPbcRLuy0unPUuafco8Q==} @@ -1774,7 +1765,7 @@ packages: dependencies: '@octokit/core': 5.0.2 '@octokit/types': 12.4.0 - dev: true + dev: false /@octokit/request-error@5.0.1: resolution: {integrity: sha512-X7pnyTMV7MgtGmiXBwmO6M5kIPrntOXdyKZLigNfQWSEQzVxR4a4vo49vJjTWX70mPndj8KhfT4Dx+2Ng3vnBQ==} @@ -1783,7 +1774,7 @@ packages: '@octokit/types': 12.4.0 deprecation: 2.3.1 once: 1.4.0 - dev: true + dev: false /@octokit/request@8.1.6: resolution: {integrity: sha512-YhPaGml3ncZC1NfXpP3WZ7iliL1ap6tLkAp6MvbK2fTTPytzVUyUesBBogcdMm86uRYO5rHaM1xIWxigWZ17MQ==} @@ -1793,13 +1784,13 @@ packages: '@octokit/request-error': 5.0.1 '@octokit/types': 12.4.0 universal-user-agent: 6.0.0 - dev: true + dev: false /@octokit/types@12.4.0: resolution: {integrity: sha512-FLWs/AvZllw/AGVs+nJ+ELCDZZJk+kY0zMen118xhL2zD0s1etIUHm1odgjP7epxYU1ln7SZxEUWYop5bhsdgQ==} dependencies: '@octokit/openapi-types': 19.1.0 - dev: true + dev: false /@pkgjs/parseargs@0.11.0: resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} @@ -2111,7 +2102,6 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] - libc: [glibc] requiresBuild: true dev: true optional: true @@ -2121,7 +2111,6 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] - libc: [musl] requiresBuild: true dev: true optional: true @@ -2131,7 +2120,6 @@ packages: engines: {node: '>= 10'} cpu: [x64] os: [linux] - libc: [glibc] requiresBuild: true dev: true optional: true @@ -2141,7 +2129,6 @@ packages: engines: {node: '>= 10'} cpu: [x64] os: [linux] - libc: [musl] requiresBuild: true dev: true optional: true @@ -2192,6 +2179,12 @@ packages: /@telegraf/types@7.1.0: resolution: {integrity: sha512-kGevOIbpMcIlCDeorKGpwZmdH7kHbqlk/Yj6dEpJMKEQw5lk0KVQY0OLXaCswy8GqlIVLd5625OB+rAntP9xVw==} + dev: false + + /@types/adm-zip@0.5.5: + resolution: {integrity: sha512-YCGstVMjc4LTY5uK9/obvxBya93axZOVOyf2GSUulADzmLhYE45u2nAssCs/fWBs1Ifq5Vat75JTPwd5XZoPJw==} + dependencies: + '@types/node': 20.12.7 dev: true /@types/babel__core@7.20.5: @@ -2307,19 +2300,19 @@ packages: /@types/prop-types@15.7.11: resolution: {integrity: sha512-ga8y9v9uyeiLdpKddhxYQkxNDrfvuPrlFb0N1qnZZByvcElJaXthF1UhvCh9TLWJBEHeNtdnbysW7Y6Uq8CVng==} - /@types/react-dom@18.2.24: - resolution: {integrity: sha512-cN6upcKd8zkGy4HU9F1+/s98Hrp6D4MOcippK4PoE8OZRngohHZpbJn1GsaDLz87MqvHNoT13nHvNqM9ocRHZg==} + /@types/react-dom@18.2.25: + resolution: {integrity: sha512-o/V48vf4MQh7juIKZU2QGDfli6p1+OOi5oXx36Hffpc9adsHeXjVp8rHuPkjd8VT8sOJ2Zp05HR7CdpGTIUFUA==} dependencies: - '@types/react': 18.2.75 + '@types/react': 18.2.77 dev: true /@types/react-transition-group@4.4.10: resolution: {integrity: sha512-hT/+s0VQs2ojCX823m60m5f0sL5idt9SO6Tj6Dg+rdphGPIeJbJ6CxvBYkgkGKrYeDjvIpKTR38UzmtHJOGW3Q==} dependencies: - '@types/react': 18.2.75 + '@types/react': 18.2.77 - /@types/react@18.2.75: - resolution: {integrity: sha512-+DNnF7yc5y0bHkBTiLKqXFe+L4B3nvOphiMY3tuA5X10esmjqk7smyBZzbGTy2vsiy/Bnzj8yFIBL8xhRacoOg==} + /@types/react@18.2.77: + resolution: {integrity: sha512-CUT9KUUF+HytDM7WiXKLF9qUSg4tGImwy4FXTlfEDPEkkNUzJ7rVFolYweJ9fS1ljoIaP7M7Rdjc5eUm/Yu5AA==} dependencies: '@types/prop-types': 15.7.11 csstype: 3.1.3 @@ -2500,7 +2493,7 @@ packages: engines: {node: '>=6.5'} dependencies: event-target-shim: 5.0.1 - dev: true + dev: false /acorn-jsx@5.3.2(acorn@8.11.2): resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} @@ -2612,13 +2605,6 @@ packages: resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} dev: true - /array-buffer-byte-length@1.0.0: - resolution: {integrity: sha512-LPuwb2P+NrQw3XhxGc36+XSvuBPopovXYTR9Ew++Du9Yb/bx5AzBfrIsBoj0EZUifjQU+sHL21sseZ3jerWO/A==} - dependencies: - call-bind: 1.0.7 - is-array-buffer: 3.0.2 - dev: true - /array-buffer-byte-length@1.0.1: resolution: {integrity: sha512-ahC5W1xgou+KTXix4sAO8Ki12Q+jf4i0+tmk3sC+zgcynshkHxzpXdImBehiUYKKKDwvfFiJl1tZt6ewscS1Mg==} engines: {node: '>= 0.4'} @@ -2662,20 +2648,20 @@ packages: resolution: {integrity: sha512-LzLoiOMAxvy+Gd3BAq3B7VeIgPdo+Q8hthvKtXybMvRV0jrXfJM/t8mw7nNlpEcVlVUnCnM2KSX4XU5HmpodOA==} engines: {node: '>= 0.4'} dependencies: - call-bind: 1.0.5 + call-bind: 1.0.7 define-properties: 1.2.1 - es-abstract: 1.22.3 + es-abstract: 1.22.5 es-shim-unscopables: 1.0.2 - get-intrinsic: 1.2.2 + get-intrinsic: 1.2.4 dev: true /array.prototype.flat@1.3.2: resolution: {integrity: sha512-djYB+Zx2vLewY8RWlNCUdHjDXs2XOgm602S9E7P/UpHgfeHL00cRiIF+IN/G/aUJ7kGPb6yO/ErDI5V2s8iycA==} engines: {node: '>= 0.4'} dependencies: - call-bind: 1.0.5 + call-bind: 1.0.7 define-properties: 1.2.1 - es-abstract: 1.22.3 + es-abstract: 1.22.5 es-shim-unscopables: 1.0.2 dev: true @@ -2683,9 +2669,9 @@ packages: resolution: {integrity: sha512-Ewyx0c9PmpcsByhSW4r+9zDU7sGjFc86qf/kKtuSCRdhfbk0SNLLkaT5qvcHnRGgc5NP/ly/y+qkXkqONX54CQ==} engines: {node: '>= 0.4'} dependencies: - call-bind: 1.0.5 + call-bind: 1.0.7 define-properties: 1.2.1 - es-abstract: 1.22.3 + es-abstract: 1.22.5 es-shim-unscopables: 1.0.2 dev: true @@ -2708,19 +2694,6 @@ packages: es-shim-unscopables: 1.0.2 dev: true - /arraybuffer.prototype.slice@1.0.2: - resolution: {integrity: sha512-yMBKppFur/fbHu9/6USUe03bZ4knMYiwFBcyiaXB8Go0qNehwX6inYPzK9U0NeQvGxKthcmHcaR8P5MStSRBAw==} - engines: {node: '>= 0.4'} - dependencies: - array-buffer-byte-length: 1.0.0 - call-bind: 1.0.7 - define-properties: 1.2.1 - es-abstract: 1.22.5 - get-intrinsic: 1.2.4 - is-array-buffer: 3.0.2 - is-shared-array-buffer: 1.0.2 - dev: true - /arraybuffer.prototype.slice@1.0.3: resolution: {integrity: sha512-bMxMKAjg13EBSVscxTaYA4mRc5t1UAXa2kXiGTNfZ079HIWXEkKmkgFrh/nJqamaLSrXO5H4WFFkPEaLJWbs3A==} engines: {node: '>= 0.4'} @@ -2766,11 +2739,6 @@ packages: postcss-value-parser: 4.2.0 dev: true - /available-typed-arrays@1.0.5: - resolution: {integrity: sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw==} - engines: {node: '>= 0.4'} - dev: true - /available-typed-arrays@1.0.7: resolution: {integrity: sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==} engines: {node: '>= 0.4'} @@ -2811,7 +2779,7 @@ packages: /before-after-hook@2.2.2: resolution: {integrity: sha512-3pZEU3NT5BFUo/AD5ERPWOgQOCZITni6iavr5AUw5AUwQjMlI0kzu5btnyD39AF0gUEsDPwJT+oY1ORBJijPjQ==} - dev: true + dev: false /big-integer@1.6.52: resolution: {integrity: sha512-QxD8cf2eVqJOOz63z6JIN9BzvVs/dlySa5HGSBH5xtR8dPteIRQnBxxKqkNTiT6jbDTF6jAfrd4oMcND9RGbQg==} @@ -2861,18 +2829,18 @@ packages: /buffer-alloc-unsafe@1.1.0: resolution: {integrity: sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg==} - dev: true + dev: false /buffer-alloc@1.2.0: resolution: {integrity: sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow==} dependencies: buffer-alloc-unsafe: 1.1.0 buffer-fill: 1.0.0 - dev: true + dev: false /buffer-fill@1.0.0: resolution: {integrity: sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ==} - dev: true + dev: false /builtin-modules@3.3.0: resolution: {integrity: sha512-zhaCDicdLuWN5UbN5IMnFqNMhNfo919sH85y2/ea+5Yg9TsTkeZxpL+JLbp6cgYFS4sRLp3YV4S6yDuqVWHYOw==} @@ -2892,14 +2860,6 @@ packages: run-applescript: 5.0.0 dev: true - /call-bind@1.0.5: - resolution: {integrity: sha512-C3nQxfFZxFRVoJoGKKI8y3MOEo129NQ+FgQ08iye+Mk4zNZZGdjfs06bVTr+DBSlA66Q2VEcMki/cUCP4SercQ==} - dependencies: - function-bind: 1.1.2 - get-intrinsic: 1.2.2 - set-function-length: 1.1.1 - dev: true - /call-bind@1.0.7: resolution: {integrity: sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==} engines: {node: '>= 0.4'} @@ -3314,7 +3274,7 @@ packages: /deprecation@2.3.1: resolution: {integrity: sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==} - dev: true + dev: false /dequal@2.0.3: resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==} @@ -3431,51 +3391,6 @@ packages: dependencies: is-arrayish: 0.2.1 - /es-abstract@1.22.3: - resolution: {integrity: sha512-eiiY8HQeYfYH2Con2berK+To6GrK2RxbPawDkGq4UiCQQfZHb6wX9qQqkbpPqaxQFcl8d9QzZqo0tGE0VcrdwA==} - engines: {node: '>= 0.4'} - dependencies: - array-buffer-byte-length: 1.0.0 - arraybuffer.prototype.slice: 1.0.2 - available-typed-arrays: 1.0.5 - call-bind: 1.0.5 - es-set-tostringtag: 2.0.2 - es-to-primitive: 1.2.1 - function.prototype.name: 1.1.6 - get-intrinsic: 1.2.2 - get-symbol-description: 1.0.0 - globalthis: 1.0.3 - gopd: 1.0.1 - has-property-descriptors: 1.0.1 - has-proto: 1.0.1 - has-symbols: 1.0.3 - hasown: 2.0.0 - internal-slot: 1.0.6 - is-array-buffer: 3.0.2 - is-callable: 1.2.7 - is-negative-zero: 2.0.2 - is-regex: 1.1.4 - is-shared-array-buffer: 1.0.2 - is-string: 1.0.7 - is-typed-array: 1.1.12 - is-weakref: 1.0.2 - object-inspect: 1.13.1 - object-keys: 1.1.1 - object.assign: 4.1.5 - regexp.prototype.flags: 1.5.1 - safe-array-concat: 1.0.1 - safe-regex-test: 1.0.0 - string.prototype.trim: 1.2.8 - string.prototype.trimend: 1.0.7 - string.prototype.trimstart: 1.0.7 - typed-array-buffer: 1.0.0 - typed-array-byte-length: 1.0.0 - typed-array-byte-offset: 1.0.0 - typed-array-length: 1.0.4 - unbox-primitive: 1.0.2 - which-typed-array: 1.1.13 - dev: true - /es-abstract@1.22.5: resolution: {integrity: sha512-oW69R+4q2wG+Hc3KZePPZxOiisRIqfKBVo/HLx94QcJeWGU/8sZhCvc829rd1kS366vlJbzBfXf9yWwf0+Ko7w==} engines: {node: '>= 0.4'} @@ -3556,15 +3471,6 @@ packages: safe-array-concat: 1.1.0 dev: true - /es-set-tostringtag@2.0.2: - resolution: {integrity: sha512-BuDyupZt65P9D2D2vA/zqcI3G5xRsklm5N3xCwuiy+/vKy8i0ifdsQP1sLgO4tZDSCaQUSnmC48khknGMV3D2Q==} - engines: {node: '>= 0.4'} - dependencies: - get-intrinsic: 1.2.4 - has-tostringtag: 1.0.2 - hasown: 2.0.1 - dev: true - /es-set-tostringtag@2.0.3: resolution: {integrity: sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ==} engines: {node: '>= 0.4'} @@ -3691,7 +3597,7 @@ packages: eslint-plugin-promise: ^6.0.0 dependencies: eslint: 8.57.0 - eslint-plugin-import: 2.29.1(@typescript-eslint/parser@7.6.0)(eslint@8.57.0) + eslint-plugin-import: 2.29.1(eslint@8.57.0) eslint-plugin-n: 16.6.2(eslint@8.57.0) eslint-plugin-promise: 6.1.1(eslint@8.57.0) dev: true @@ -3702,7 +3608,7 @@ packages: peerDependencies: eslint-plugin-import: '>=1.4.0' dependencies: - eslint-plugin-import: 2.29.1(@typescript-eslint/parser@7.6.0)(eslint@8.57.0) + eslint-plugin-import: 2.29.1(eslint@8.57.0) dev: true /eslint-import-resolver-node@0.3.9: @@ -3715,7 +3621,7 @@ packages: - supports-color dev: true - /eslint-module-utils@2.8.0(@typescript-eslint/parser@7.6.0)(eslint-import-resolver-node@0.3.9)(eslint@8.57.0): + /eslint-module-utils@2.8.0(eslint-import-resolver-node@0.3.9)(eslint@8.57.0): resolution: {integrity: sha512-aWajIYfsqCKRDgUfjEXNN/JlrzauMuSEy5sbd7WXbtW3EH6A6MpwEh42c7qD+MqQo9QMJ6fWLAeIJynx0g6OAw==} engines: {node: '>=4'} peerDependencies: @@ -3736,7 +3642,6 @@ packages: eslint-import-resolver-webpack: optional: true dependencies: - '@typescript-eslint/parser': 7.6.0(eslint@8.57.0)(typescript@5.4.5) debug: 3.2.7 eslint: 8.57.0 eslint-import-resolver-node: 0.3.9 @@ -3763,7 +3668,7 @@ packages: htmlparser2: 9.1.0 dev: true - /eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.6.0)(eslint@8.57.0): + /eslint-plugin-import@2.29.1(eslint@8.57.0): resolution: {integrity: sha512-BbPC0cuExzhiMo4Ff1BTVwHpjjv28C5R+btTOGaCRC7UEz801up0JadwkeSk5Ued6TG34uaczuVuH6qyy5YUxw==} engines: {node: '>=4'} peerDependencies: @@ -3773,7 +3678,6 @@ packages: '@typescript-eslint/parser': optional: true dependencies: - '@typescript-eslint/parser': 7.6.0(eslint@8.57.0)(typescript@5.4.5) array-includes: 3.1.7 array.prototype.findlastindex: 1.2.3 array.prototype.flat: 1.3.2 @@ -3782,8 +3686,8 @@ packages: doctrine: 2.1.0 eslint: 8.57.0 eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.8.0(@typescript-eslint/parser@7.6.0)(eslint-import-resolver-node@0.3.9)(eslint@8.57.0) - hasown: 2.0.0 + eslint-module-utils: 2.8.0(eslint-import-resolver-node@0.3.9)(eslint@8.57.0) + hasown: 2.0.1 is-core-module: 2.13.1 is-glob: 4.0.3 minimatch: 3.1.2 @@ -3810,12 +3714,12 @@ packages: eslint-plugin-es-x: 7.5.0(eslint@8.57.0) get-tsconfig: 4.7.2 globals: 13.24.0 - ignore: 5.3.0 + ignore: 5.3.1 is-builtin-module: 3.2.1 is-core-module: 2.13.1 minimatch: 3.1.2 resolve: 1.22.8 - semver: 7.5.4 + semver: 7.6.0 dev: true /eslint-plugin-prettier@5.1.3(eslint-config-prettier@9.1.0)(eslint@8.57.0)(prettier@3.2.5): @@ -3918,7 +3822,7 @@ packages: glob-parent: 6.0.2 globals: 13.24.0 graphemer: 1.4.0 - ignore: 5.3.0 + ignore: 5.3.1 imurmurhash: 0.1.4 is-glob: 4.0.3 is-path-inside: 3.0.3 @@ -3979,7 +3883,7 @@ packages: /event-target-shim@5.0.1: resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} engines: {node: '>=6'} - dev: true + dev: false /eventemitter3@5.0.1: resolution: {integrity: sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==} @@ -4264,15 +4168,6 @@ packages: engines: {node: '>=18'} dev: true - /get-intrinsic@1.2.2: - resolution: {integrity: sha512-0gSo4ml/0j98Y3lngkFEot/zhiCeWsbYIlZ+uZOVgzLyLaUw7wxUL+nCTP0XJvJg1AXulJRI3UJi8GsbDuxdGA==} - dependencies: - function-bind: 1.1.2 - has-proto: 1.0.1 - has-symbols: 1.0.3 - hasown: 2.0.0 - dev: true - /get-intrinsic@1.2.4: resolution: {integrity: sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==} engines: {node: '>= 0.4'} @@ -4294,14 +4189,6 @@ packages: engines: {node: '>=16'} dev: true - /get-symbol-description@1.0.0: - resolution: {integrity: sha512-2EmdH1YvIQiZpltCNgkuiUnyukzxM/R6NDJX31Ke3BG1Nq5b0S2PhX59UKi9vZpPDQVdqn+1IcaAwnzTT5vCjw==} - engines: {node: '>= 0.4'} - dependencies: - call-bind: 1.0.7 - get-intrinsic: 1.2.4 - dev: true - /get-symbol-description@1.0.2: resolution: {integrity: sha512-g0QYk1dZBxGwk+Ngc+ltRH2IBp2f7zBkBMBJZCDerh6EhlhSR6+9irMCuT/09zD6qkarHUSn529sK/yL4S27mg==} engines: {node: '>= 0.4'} @@ -4347,7 +4234,7 @@ packages: dependencies: foreground-child: 3.1.1 jackspeak: 2.3.6 - minimatch: 9.0.3 + minimatch: 9.0.4 minipass: 7.0.4 path-scurry: 1.10.1 dev: true @@ -4456,23 +4343,12 @@ packages: engines: {node: '>=8'} dev: true - /has-property-descriptors@1.0.1: - resolution: {integrity: sha512-VsX8eaIewvas0xnvinAe9bw4WfIeODpGYikiWYLH+dma0Jw6KHYqWiWfhQlgOVK8D6PvjubK5Uc4P0iIhIcNVg==} - dependencies: - get-intrinsic: 1.2.4 - dev: true - /has-property-descriptors@1.0.2: resolution: {integrity: sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==} dependencies: es-define-property: 1.0.0 dev: true - /has-proto@1.0.1: - resolution: {integrity: sha512-7qE+iP+O+bgF9clE5+UoBFzE65mlBiVj3tKCrlNQ0Ogwm0BjpT/gK4SlLYDMybDh5I3TCTKnPPa0oMG7JDYrhg==} - engines: {node: '>= 0.4'} - dev: true - /has-proto@1.0.3: resolution: {integrity: sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==} engines: {node: '>= 0.4'} @@ -4490,18 +4366,11 @@ packages: has-symbols: 1.0.3 dev: true - /hasown@2.0.0: - resolution: {integrity: sha512-vUptKVTpIJhcczKBbgnS+RtcuYMB8+oNzPK2/Hp3hanz8JmpATdmmgLgSaadVREkDm+e2giHwY3ZRkyjSIDDFA==} - engines: {node: '>= 0.4'} - dependencies: - function-bind: 1.1.2 - /hasown@2.0.1: resolution: {integrity: sha512-1/th4MHjnwncwXsIW6QMzlvYL9kG5e/CpVvLRZe4XPa8TOUNbCELqmvhDmnkNsAjwaG4+I8gJJL0JBvTTLO9qA==} engines: {node: '>= 0.4'} dependencies: function-bind: 1.1.2 - dev: true /hast-util-to-jsx-runtime@2.3.0: resolution: {integrity: sha512-H/y0+IWPdsLLS738P8tDnrQ8Z+dj12zQQ6WC11TIM21C8WFVoIxcqWXf2H3hiTVZjF1AWqoimGwrTWecWrnmRQ==} @@ -4599,7 +4468,7 @@ packages: resolution: {integrity: sha512-AB6lFlbwwyIqMdHYhwPe+kjOC3Oc5P3nThEoW/AaO2BX3vJDjWPFxYLxokUZOo6RNX20He3AaT8sESs9NJcmEw==} engines: {node: '>=18'} hasBin: true - dev: true + dev: false /i18next@23.11.1: resolution: {integrity: sha512-mXw4A24BiPZKRsbb9ewgSvjYd6fxFCNwJyfK6nYfSTIAX2GkCWcb598m3DFkDZmqADatvuASrKo6qwORz3VwTQ==} @@ -4607,11 +4476,6 @@ packages: '@babel/runtime': 7.24.1 dev: false - /ignore@5.3.0: - resolution: {integrity: sha512-g7dmpshy+gD7mh88OC9NwSGTKoc3kyLAZQRU1mt53Aw/vnvfXnbC+F/7F7QoYVKbV+KNvJx8wArewKy1vXMtlg==} - engines: {node: '>= 4'} - dev: true - /ignore@5.3.1: resolution: {integrity: sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==} engines: {node: '>= 4'} @@ -4660,15 +4524,6 @@ packages: resolution: {integrity: sha512-EcKzdTHVe8wFVOGEYXiW9WmJXPjqi1T+234YpJr98RiFYKHV3cdy1+3mkTE+KHTHxFFLH51SfaGOoUdW+v7ViQ==} dev: false - /internal-slot@1.0.6: - resolution: {integrity: sha512-Xj6dv+PsbtwyPpEflsejS+oIZxmMlV44zAhG479uYu89MsjcYOhCFnNyKrkJrihbsiasQyY0afoCl/9BLR65bg==} - engines: {node: '>= 0.4'} - dependencies: - get-intrinsic: 1.2.4 - hasown: 2.0.1 - side-channel: 1.0.4 - dev: true - /internal-slot@1.0.7: resolution: {integrity: sha512-NGnrKwXzSms2qUUih/ILZ5JBqNTSa1+ZmP6flaIp6KmSElgE9qdndzS3cqjrDovwFdmwsGsLdeFgB6suw+1e9g==} engines: {node: '>= 0.4'} @@ -4693,14 +4548,6 @@ packages: is-decimal: 2.0.1 dev: false - /is-array-buffer@3.0.2: - resolution: {integrity: sha512-y+FyyR/w8vfIRq4eQcM1EYgSTnmHXPqaF+IgzgraytCFq5Xh8lllDVmAZolPJiZttZLeFSINPYMaEJ7/vWUa1w==} - dependencies: - call-bind: 1.0.7 - get-intrinsic: 1.2.4 - is-typed-array: 1.1.12 - dev: true - /is-array-buffer@3.0.4: resolution: {integrity: sha512-wcjaerHw0ydZwfhiKbXJWLDY8A7yV7KhjQOpb83hGgGfId/aQa4TOvwyzn2PuswW2gPCYEL/nEAiSVpdOj1lXw==} engines: {node: '>= 0.4'} @@ -4754,7 +4601,7 @@ packages: /is-core-module@2.13.1: resolution: {integrity: sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==} dependencies: - hasown: 2.0.0 + hasown: 2.0.1 /is-date-object@1.0.5: resolution: {integrity: sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ==} @@ -4835,11 +4682,6 @@ packages: resolution: {integrity: sha512-cOZFQQozTha1f4MxLFzlgKYPTyj26picdZTx82hbc/Xf4K/tZOOXSCkMvU4pKioRXGDLJRn0GM7Upe7kR721yg==} dev: true - /is-negative-zero@2.0.2: - resolution: {integrity: sha512-dqJvarLawXsFbNDeJW7zAz8ItJ9cd28YufuuFzh0G8pNHjJMnY08Dv7sYX2uF5UpQOwieAeOExEYAWWfu7ZZUA==} - engines: {node: '>= 0.4'} - dev: true - /is-negative-zero@2.0.3: resolution: {integrity: sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==} engines: {node: '>= 0.4'} @@ -4888,12 +4730,6 @@ packages: resolution: {integrity: sha512-+2cnTEZeY5z/iXGbLhPrOAaK/Mau5k5eXq9j14CpRTftq0pAJu2MwVRSZhyZWBzx3o6X795Lz6Bpb6R0GKf37g==} dev: true - /is-shared-array-buffer@1.0.2: - resolution: {integrity: sha512-sqN2UDu1/0y6uvXyStCOzyhAjCSlHceFoMKJW8W9EU9cvic/QdsZ0kEU93HEy3IUEFZIiH/3w+AH/UQbPHNdhA==} - dependencies: - call-bind: 1.0.7 - dev: true - /is-shared-array-buffer@1.0.3: resolution: {integrity: sha512-nA2hv5XIhLR3uVzDDfCIknerhx8XUKnstuOERPNNIinXG7v9u+ohXF67vxm4TPTEPU6lm61ZkwP3c9PCB97rhg==} engines: {node: '>= 0.4'} @@ -4932,13 +4768,6 @@ packages: text-extensions: 2.4.0 dev: true - /is-typed-array@1.1.12: - resolution: {integrity: sha512-Z14TF2JNG8Lss5/HMqt0//T9JeHXttXy5pH/DBU4vi98ozO2btxzq9MwYDZYnKwU8nRsz/+GVFVRDq3DkVuSPg==} - engines: {node: '>= 0.4'} - dependencies: - which-typed-array: 1.1.13 - dev: true - /is-typed-array@1.1.13: resolution: {integrity: sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==} engines: {node: '>= 0.4'} @@ -5605,13 +5434,6 @@ packages: brace-expansion: 1.1.11 dev: true - /minimatch@9.0.3: - resolution: {integrity: sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg==} - engines: {node: '>=16 || 14 >=14.17'} - dependencies: - brace-expansion: 2.0.1 - dev: true - /minimatch@9.0.4: resolution: {integrity: sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==} engines: {node: '>=16 || 14 >=14.17'} @@ -5634,12 +5456,12 @@ packages: /mri@1.2.0: resolution: {integrity: sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==} engines: {node: '>=4'} - dev: true + dev: false /ms@2.1.2: resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==} - /mui-color-input@2.0.3(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0): + /mui-color-input@2.0.3(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@mui/material@5.15.15)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-rAd040qQ0Y+8dk4gE8kkCiJ/vCgA0j4vv1quJ43BfORTFE3uHarHj0xY1Vo9CPbojtx1f5vW+CjckYPRIZPIRg==} peerDependencies: '@emotion/react': ^11.5.0 @@ -5653,10 +5475,10 @@ packages: optional: true dependencies: '@ctrl/tinycolor': 4.0.3 - '@emotion/react': 11.11.4(@types/react@18.2.75)(react@18.2.0) - '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.75)(react@18.2.0) - '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.75)(react-dom@18.2.0)(react@18.2.0) - '@types/react': 18.2.75 + '@emotion/react': 11.11.4(@types/react@18.2.77)(react@18.2.0) + '@emotion/styled': 11.11.5(@emotion/react@11.11.4)(@types/react@18.2.77)(react@18.2.0) + '@mui/material': 5.15.15(@emotion/react@11.11.4)(@emotion/styled@11.11.5)(@types/react@18.2.77)(react-dom@18.2.0)(react@18.2.0) + '@types/react': 18.2.77 react: 18.2.0 react-dom: 18.2.0(react@18.2.0) dev: false @@ -5692,7 +5514,7 @@ packages: optional: true dependencies: whatwg-url: 5.0.0 - dev: true + dev: false /node-fetch@3.3.2: resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} @@ -5729,7 +5551,7 @@ packages: ansi-styles: 6.2.1 cross-spawn: 7.0.3 memorystream: 0.3.1 - minimatch: 9.0.3 + minimatch: 9.0.4 pidtree: 0.6.0 read-package-json-fast: 3.0.2 shell-quote: 1.8.1 @@ -5785,18 +5607,18 @@ packages: resolution: {integrity: sha512-UPbPHML6sL8PI/mOqPwsH4G6iyXcCGzLin8KvEPenOZN5lpCNBZZQ+V62vdjB1mQHrmqGQt5/OJzemUA+KJmEA==} engines: {node: '>= 0.4'} dependencies: - call-bind: 1.0.5 + call-bind: 1.0.7 define-properties: 1.2.1 - es-abstract: 1.22.3 + es-abstract: 1.22.5 dev: true /object.groupby@1.0.1: resolution: {integrity: sha512-HqaQtqLnp/8Bn4GL16cj+CUYbnpe1bh0TtEaWvybszDG4tgxCJuRpV8VGuvNaI1fAnI4lUJzDG55MXcOH4JZcQ==} dependencies: - call-bind: 1.0.5 + call-bind: 1.0.7 define-properties: 1.2.1 - es-abstract: 1.22.3 - get-intrinsic: 1.2.2 + es-abstract: 1.22.5 + get-intrinsic: 1.2.4 dev: true /object.hasown@1.1.3: @@ -5810,16 +5632,15 @@ packages: resolution: {integrity: sha512-aU6xnDFYT3x17e/f0IiiwlGPTy2jzMySGfUB4fq6z7CV8l85CWHDk5ErhyhpfDHhrOMwGFhSQkhMGHaIotA6Ng==} engines: {node: '>= 0.4'} dependencies: - call-bind: 1.0.5 + call-bind: 1.0.7 define-properties: 1.2.1 - es-abstract: 1.22.3 + es-abstract: 1.22.5 dev: true /once@1.4.0: resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} dependencies: wrappy: 1.0.2 - dev: true /onetime@5.1.2: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} @@ -5888,7 +5709,7 @@ packages: /p-timeout@4.1.0: resolution: {integrity: sha512-+/wmHtzJuWii1sXn3HCuH/FTwGhrp4tmJTxSKJbfS+vkipci6osxXM5mY0jUiRzWKMTgUT8l7HFbeSwZAynqHw==} engines: {node: '>=10'} - dev: true + dev: false /parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} @@ -6048,14 +5869,6 @@ packages: postcss: 8.4.38 dev: true - /postcss-selector-parser@6.0.15: - resolution: {integrity: sha512-rEYkQOMUCEMhsKbK66tbEU9QVIxbhN18YiniAwA7XQYTVBqrBy+P2p5JcdqsHgKM2zWylp8d7J6eszocfds5Sw==} - engines: {node: '>=4'} - dependencies: - cssesc: 3.0.0 - util-deprecate: 1.0.2 - dev: true - /postcss-selector-parser@6.0.16: resolution: {integrity: sha512-A0RVJrX+IUkVZbW3ClroRWurercFhieevHB38sr2+l9eUClMqome3LmEmnhlNy+5Mr2EYN6B2Kaw9wYdd+VHiw==} engines: {node: '>=4'} @@ -6150,7 +5963,7 @@ packages: peerDependencies: react: '>=16.13.1' dependencies: - '@babel/runtime': 7.23.9 + '@babel/runtime': 7.24.1 react: 18.2.0 dev: false @@ -6158,8 +5971,8 @@ packages: resolution: {integrity: sha512-nsO+KSNgo1SbJqJEYRE9ERzo7YtYbou/OqjSQKxV7jcKox7+usiUVZOAC+XnDOABXggQTno0Y1CpVnuWEc1boQ==} dev: false - /react-hook-form@7.51.2(react@18.2.0): - resolution: {integrity: sha512-y++lwaWjtzDt/XNnyGDQy6goHskFualmDlf+jzEZvjvz6KWDf7EboL7pUvRCzPTJd0EOPpdekYaQLEvvG6m6HA==} + /react-hook-form@7.51.3(react@18.2.0): + resolution: {integrity: sha512-cvJ/wbHdhYx8aviSWh28w9ImjmVsb5Y05n1+FW786vEZQJV5STNM0pW6ujS+oiBecb0ARBxJFyAnXj9+GHXACQ==} engines: {node: '>=12.22.0'} peerDependencies: react: ^16.8.0 || ^17 || ^18 @@ -6180,7 +5993,7 @@ packages: react-native: optional: true dependencies: - '@babel/runtime': 7.23.9 + '@babel/runtime': 7.24.1 html-parse-stringify: 3.0.1 i18next: 23.11.1 react: 18.2.0 @@ -6194,14 +6007,14 @@ packages: resolution: {integrity: sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==} dev: false - /react-markdown@9.0.1(@types/react@18.2.75)(react@18.2.0): + /react-markdown@9.0.1(@types/react@18.2.77)(react@18.2.0): resolution: {integrity: sha512-186Gw/vF1uRkydbsOIkcGXw7aHq0sZOCRFFjGrr7b9+nVZg4UfA4enXCaxm4fUzecU38sWfrNDitGhshuU7rdg==} peerDependencies: '@types/react': '>=18' react: '>=18' dependencies: '@types/hast': 3.0.4 - '@types/react': 18.2.75 + '@types/react': 18.2.77 devlop: 1.1.0 hast-util-to-jsx-runtime: 2.3.0 html-url-attributes: 3.0.0 @@ -6250,7 +6063,7 @@ packages: react: '>=16.6.0' react-dom: '>=16.6.0' dependencies: - '@babel/runtime': 7.23.8 + '@babel/runtime': 7.24.1 dom-helpers: 5.2.1 loose-envify: 1.4.0 prop-types: 15.8.1 @@ -6329,15 +6142,6 @@ packages: resolution: {integrity: sha512-srw17NI0TUWHuGa5CFGGmhfNIeja30WMBfbslPNhf6JrqQlLN5gcrvig1oqPxiVaXb0oW0XRKtH6Nngs5lKCIA==} dev: false - /regexp.prototype.flags@1.5.1: - resolution: {integrity: sha512-sy6TXMN+hnP/wMy+ISxg3krXx7BAtWVO4UouuCN/ziM9UEne0euamVNafDfvC83bRNr95y0V5iijeDQFUNpvrg==} - engines: {node: '>= 0.4'} - dependencies: - call-bind: 1.0.7 - define-properties: 1.2.1 - set-function-name: 2.0.1 - dev: true - /regexp.prototype.flags@1.5.2: resolution: {integrity: sha512-NcDiDkTLuPR+++OCKB0nWafEmhg/Da8aUPLPMQbK+bxKKCm1/S5he+AqYa4PlMCVBalb4/yxIRub6qkEx5yJbw==} engines: {node: '>= 0.4'} @@ -6482,16 +6286,6 @@ packages: dependencies: queue-microtask: 1.2.3 - /safe-array-concat@1.0.1: - resolution: {integrity: sha512-6XbUAseYE2KtOuGueyeobCySj9L4+66Tn6KQMOPQJrAJEowYKW/YR/MGJZl7FdydUdaFu4LYyDZjxf4/Nmo23Q==} - engines: {node: '>=0.4'} - dependencies: - call-bind: 1.0.7 - get-intrinsic: 1.2.4 - has-symbols: 1.0.3 - isarray: 2.0.5 - dev: true - /safe-array-concat@1.1.0: resolution: {integrity: sha512-ZdQ0Jeb9Ofti4hbt5lX3T2JcAamT9hfzYU1MNB+z/jaEbB6wfFfPIR/zEORmZqobkCCJhSjodobH6WHNmJ97dg==} engines: {node: '>=0.4'} @@ -6510,15 +6304,7 @@ packages: resolution: {integrity: sha512-b9wZ986HHCo/HbKrRpBJb2kqXMK9CEWIE1egeEvZsYn69ay3kdfl9nG3RyOcR+jInTDf7a86WQ1d4VJX7goSSQ==} dependencies: buffer-alloc: 1.2.0 - dev: true - - /safe-regex-test@1.0.0: - resolution: {integrity: sha512-JBUUzyOgEwXQY1NuPtvcj/qcBDbDmEvWufhlnXZIm75DEHp+afM1r1ujJpJsV/gSM4t59tpDyPi1sd6ZaPFfsA==} - dependencies: - call-bind: 1.0.7 - get-intrinsic: 1.2.4 - is-regex: 1.1.4 - dev: true + dev: false /safe-regex-test@1.0.3: resolution: {integrity: sha512-CdASjNJPvRa7roO6Ra/gLYBTzYzzPyyBXxIMdGW3USQLyjWEls2RgW5UBTXaQVp+OrpeCK3bLem8smtmheoRuw==} @@ -6532,7 +6318,7 @@ packages: /sandwich-stream@2.0.2: resolution: {integrity: sha512-jLYV0DORrzY3xaz/S9ydJL6Iz7essZeAfnAavsJ+zsJGZ1MOnsS52yRjU3uF3pJa/lla7+wisp//fxOwOH8SKQ==} engines: {node: '>= 0.10'} - dev: true + dev: false /sass@1.74.1: resolution: {integrity: sha512-w0Z9p/rWZWelb88ISOLyvqTWGmtmu2QJICqDBGyNnfG4OUnPX9BBjjYIXUpXCMOOg5MQWNpqzt876la1fsTvUA==} @@ -6559,14 +6345,6 @@ packages: hasBin: true dev: true - /semver@7.5.4: - resolution: {integrity: sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==} - engines: {node: '>=10'} - hasBin: true - dependencies: - lru-cache: 6.0.0 - dev: true - /semver@7.6.0: resolution: {integrity: sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==} engines: {node: '>=10'} @@ -6575,16 +6353,6 @@ packages: lru-cache: 6.0.0 dev: true - /set-function-length@1.1.1: - resolution: {integrity: sha512-VoaqjbBJKiWtg4yRcKBQ7g7wnGnLV3M8oLvVWwOk2PdYY6PEFegR1vezXR0tw6fZGF9csVakIRjrJiy2veSBFQ==} - engines: {node: '>= 0.4'} - dependencies: - define-data-property: 1.1.4 - get-intrinsic: 1.2.4 - gopd: 1.0.1 - has-property-descriptors: 1.0.2 - dev: true - /set-function-length@1.2.1: resolution: {integrity: sha512-j4t6ccc+VsKwYHso+kElc5neZpjtq9EnRICFZtWyBsLojhmeF/ZBd/elqm22WJh/BziDe/SBiOeAt0m2mfLD0g==} engines: {node: '>= 0.4'} @@ -6885,7 +6653,7 @@ packages: known-css-properties: 0.29.0 postcss-media-query-parser: 0.2.3 postcss-resolve-nested-selector: 0.1.1 - postcss-selector-parser: 6.0.15 + postcss-selector-parser: 6.0.16 postcss-value-parser: 4.2.0 stylelint: 16.3.1(typescript@5.4.5) dev: true @@ -7021,7 +6789,7 @@ packages: transitivePeerDependencies: - encoding - supports-color - dev: true + dev: false /text-extensions@2.4.0: resolution: {integrity: sha512-te/NtwBwfiNRLf9Ijqx3T0nlqZiQ2XrrtBvu+cLL8ZRrGkO0NHTug8MYFKyoSrv/sHTaSKfilUkizV6XhxMJ3g==} @@ -7053,7 +6821,7 @@ packages: /tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} - dev: true + dev: false /trim-lines@3.0.1: resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==} @@ -7111,7 +6879,7 @@ packages: /tunnel@0.0.6: resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==} engines: {node: '>=0.6.11 <=0.7.0 || >=0.7.3'} - dev: true + dev: false /type-check@0.4.0: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} @@ -7130,15 +6898,6 @@ packages: engines: {node: '>=14.16'} dev: true - /typed-array-buffer@1.0.0: - resolution: {integrity: sha512-Y8KTSIglk9OZEr8zywiIHG/kmQ7KWyjseXs1CbSo8vC42w7hg2HgYTxSWwP0+is7bWDc1H+Fo026CpHFwm8tkw==} - engines: {node: '>= 0.4'} - dependencies: - call-bind: 1.0.7 - get-intrinsic: 1.2.4 - is-typed-array: 1.1.12 - dev: true - /typed-array-buffer@1.0.2: resolution: {integrity: sha512-gEymJYKZtKXzzBzM4jqa9w6Q1Jjm7x2d+sh19AdsD4wqnMPDYyvwpsIc2Q/835kHuo3BEQ7CjelGhfTsoBb2MQ==} engines: {node: '>= 0.4'} @@ -7148,16 +6907,6 @@ packages: is-typed-array: 1.1.13 dev: true - /typed-array-byte-length@1.0.0: - resolution: {integrity: sha512-Or/+kvLxNpeQ9DtSydonMxCx+9ZXOswtwJn17SNLvhptaXYDJvkFFP5zbfU/uLmvnBJlI4yrnXRxpdWH/M5tNA==} - engines: {node: '>= 0.4'} - dependencies: - call-bind: 1.0.7 - for-each: 0.3.3 - has-proto: 1.0.3 - is-typed-array: 1.1.12 - dev: true - /typed-array-byte-length@1.0.1: resolution: {integrity: sha512-3iMJ9q0ao7WE9tWcaYKIptkNBuOIcZCCT0d4MRvuuH88fEoEH62IuQe0OtraD3ebQEoTRk8XCBoknUNc1Y67pw==} engines: {node: '>= 0.4'} @@ -7169,17 +6918,6 @@ packages: is-typed-array: 1.1.13 dev: true - /typed-array-byte-offset@1.0.0: - resolution: {integrity: sha512-RD97prjEt9EL8YgAgpOkf3O4IF9lhJFr9g0htQkm0rchFp/Vx7LW5Q8fSXXub7BXAODyUQohRMyOc3faCPd0hg==} - engines: {node: '>= 0.4'} - dependencies: - available-typed-arrays: 1.0.5 - call-bind: 1.0.7 - for-each: 0.3.3 - has-proto: 1.0.3 - is-typed-array: 1.1.12 - dev: true - /typed-array-byte-offset@1.0.2: resolution: {integrity: sha512-Ous0vodHa56FviZucS2E63zkgtgrACj7omjwd/8lTEMEPFFyjfixMZ1ZXenpgCFBBt4EC1J2XsyVS2gkG0eTFA==} engines: {node: '>= 0.4'} @@ -7192,14 +6930,6 @@ packages: is-typed-array: 1.1.13 dev: true - /typed-array-length@1.0.4: - resolution: {integrity: sha512-KjZypGq+I/H7HI5HlOoGHkWUUGq+Q0TPhQurLbyrVrvnKTBgzLhIJ7j6J/XTQOi0d1RjyZ0wdas8bKs2p0x3Ng==} - dependencies: - call-bind: 1.0.7 - for-each: 0.3.3 - is-typed-array: 1.1.12 - dev: true - /typed-array-length@1.0.5: resolution: {integrity: sha512-yMi0PlwuznKHxKmcpoOdeLwxBoVPkqZxd7q2FgMkmD3bNwvF5VW0+UlUQ1k1vmktTu4Yu13Q0RIxEP8+B+wloA==} engines: {node: '>= 0.4'} @@ -7235,7 +6965,7 @@ packages: engines: {node: '>=14.0'} dependencies: '@fastify/busboy': 2.1.0 - dev: true + dev: false /unicorn-magic@0.1.0: resolution: {integrity: sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==} @@ -7296,7 +7026,7 @@ packages: /universal-user-agent@6.0.0: resolution: {integrity: sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==} - dev: true + dev: false /universalify@2.0.0: resolution: {integrity: sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==} @@ -7457,14 +7187,14 @@ packages: /webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} - dev: true + dev: false /whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} dependencies: tr46: 0.0.3 webidl-conversions: 3.0.1 - dev: true + dev: false /which-boxed-primitive@1.0.2: resolution: {integrity: sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==} @@ -7503,17 +7233,6 @@ packages: is-weakset: 2.0.2 dev: true - /which-typed-array@1.1.13: - resolution: {integrity: sha512-P5Nra0qjSncduVPEAr7xhoF5guty49ArDTwzJ/yNuPIbZppyRxFQsRCWrocxIY+CnMVG+qfbU2FmDKyvSGClow==} - engines: {node: '>= 0.4'} - dependencies: - available-typed-arrays: 1.0.5 - call-bind: 1.0.7 - for-each: 0.3.3 - gopd: 1.0.1 - has-tostringtag: 1.0.2 - dev: true - /which-typed-array@1.1.14: resolution: {integrity: sha512-VnXFiIW8yNn9kIHN88xvZ4yOWchftKDsRJ8fEPacX/wl1lOvBrhsJ/OeJCXq7B0AaijRuqgzSKalJoPk+D8MPg==} engines: {node: '>= 0.4'} @@ -7569,7 +7288,6 @@ packages: /wrappy@1.0.2: resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} - dev: true /write-file-atomic@5.0.1: resolution: {integrity: sha512-+QU2zd6OTD8XWIJCbffaiQeH9U73qIqafo1x6V1snCWYGJf6cVE0cDR4D8xRzcEnfI21IFrUPzPGtcPf8AC+Rw==} diff --git a/clash-nyanpasu/pnpm-workspace.yaml b/clash-nyanpasu/pnpm-workspace.yaml new file mode 100644 index 0000000000..5b81f0cef0 --- /dev/null +++ b/clash-nyanpasu/pnpm-workspace.yaml @@ -0,0 +1,3 @@ +packages: + - "frontend/*" + - "scripts" diff --git a/clash-nyanpasu/scripts/package.json b/clash-nyanpasu/scripts/package.json new file mode 100644 index 0000000000..3e89bcce3e --- /dev/null +++ b/clash-nyanpasu/scripts/package.json @@ -0,0 +1,18 @@ +{ + "name": "@nyanpasu/scripts", + "version": "0.1.0", + "dependencies": { + "@actions/github": "6.0.0", + "telegraf": "4.16.3" + }, + "devDependencies": { + "@types/adm-zip": "0.5.5", + "adm-zip": "0.5.12", + "colorize-template": "1.0.0", + "consola": "3.2.3", + "fs-extra": "11.2.0", + "https-proxy-agent": "7.0.4", + "node-fetch": "3.3.2", + "picocolors": "1.0.0" + } +} diff --git a/clash-nyanpasu/scripts/tsconfig.json b/clash-nyanpasu/scripts/tsconfig.json new file mode 100644 index 0000000000..8162dc1f67 --- /dev/null +++ b/clash-nyanpasu/scripts/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "baseUrl": ".", + "target": "ESNext", + "useDefineForClassFields": true, + "lib": ["ESNext"], + "allowJs": true, + "skipLibCheck": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "forceConsistentCasingInFileNames": true, + "module": "ESNext", + "moduleResolution": "Node", + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + }, +} diff --git a/clash-nyanpasu/scripts/updatelog.ts b/clash-nyanpasu/scripts/updatelog.ts index d965f8ced7..69d67f8e31 100644 --- a/clash-nyanpasu/scripts/updatelog.ts +++ b/clash-nyanpasu/scripts/updatelog.ts @@ -1,5 +1,5 @@ import fs from "fs-extra"; -import path from "node:path"; +import path from "path"; import { cwd } from "./utils/env"; const UPDATE_LOG = "UPDATELOG.md"; diff --git a/clash-nyanpasu/scripts/utils/env.ts b/clash-nyanpasu/scripts/utils/env.ts index 234bd3d6eb..68899142fe 100644 --- a/clash-nyanpasu/scripts/utils/env.ts +++ b/clash-nyanpasu/scripts/utils/env.ts @@ -1,4 +1,4 @@ -import path from "node:path"; +import path from "path"; export const cwd = process.cwd(); export const TAURI_APP_DIR = path.join(cwd, "backend/tauri"); diff --git a/clash-nyanpasu/tsconfig.json b/clash-nyanpasu/tsconfig.json index b70f52bec1..12dd957129 100644 --- a/clash-nyanpasu/tsconfig.json +++ b/clash-nyanpasu/tsconfig.json @@ -1,25 +1,8 @@ { "compilerOptions": { - "baseUrl": ".", - "target": "ESNext", - "useDefineForClassFields": true, - "lib": ["DOM", "DOM.Iterable", "ESNext"], - "allowJs": true, "skipLibCheck": true, "esModuleInterop": true, - "allowSyntheticDefaultImports": true, - "strict": true, - "forceConsistentCasingInFileNames": true, - "module": "ESNext", - "moduleResolution": "Node", - "resolveJsonModule": true, - "isolatedModules": true, - "noEmit": true, - "jsx": "preserve", - "paths": { - "@/*": ["src/*"], - "~/*": ["./*"], - }, }, - "include": ["./src"], + "include": ["./frontend/**/tsconfig.json", "./scripts/updater.ts"], + "exclude": ["node_modules"], } diff --git a/mihomo/adapter/provider/provider.go b/mihomo/adapter/provider/provider.go index 2715a30972..a40a50ec44 100644 --- a/mihomo/adapter/provider/provider.go +++ b/mihomo/adapter/provider/provider.go @@ -124,8 +124,8 @@ func (pp *proxySetProvider) getSubscriptionInfo() { go func() { ctx, cancel := context.WithTimeout(context.Background(), time.Second*90) defer cancel() - resp, err := mihomoHttp.HttpRequest(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), - http.MethodGet, http.Header{"User-Agent": {C.UA}}, nil) + resp, err := mihomoHttp.HttpRequestWithProxy(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), + http.MethodGet, http.Header{"User-Agent": {C.UA}}, nil, pp.Vehicle().Proxy()) if err != nil { return } @@ -133,8 +133,8 @@ func (pp *proxySetProvider) getSubscriptionInfo() { userInfoStr := strings.TrimSpace(resp.Header.Get("subscription-userinfo")) if userInfoStr == "" { - resp2, err := mihomoHttp.HttpRequest(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), - http.MethodGet, http.Header{"User-Agent": {"Quantumultx"}}, nil) + resp2, err := mihomoHttp.HttpRequestWithProxy(ctx, pp.Vehicle().(*resource.HTTPVehicle).Url(), + http.MethodGet, http.Header{"User-Agent": {"Quantumultx"}}, nil, pp.Vehicle().Proxy()) if err != nil { return } diff --git a/mihomo/component/resolver/system.go b/mihomo/component/resolver/system.go new file mode 100644 index 0000000000..4ed990a365 --- /dev/null +++ b/mihomo/component/resolver/system.go @@ -0,0 +1,35 @@ +package resolver + +import "sync" + +var blacklist struct { + Map map[string]struct{} + Mutex sync.Mutex +} + +func AddSystemDnsBlacklist(names ...string) { + blacklist.Mutex.Lock() + defer blacklist.Mutex.Unlock() + for _, name := range names { + blacklist.Map[name] = struct{}{} + } +} + +func RemoveSystemDnsBlacklist(names ...string) { + blacklist.Mutex.Lock() + defer blacklist.Mutex.Unlock() + for _, name := range names { + delete(blacklist.Map, name) + } +} + +func IsSystemDnsBlacklisted(names ...string) bool { + blacklist.Mutex.Lock() + defer blacklist.Mutex.Unlock() + for _, name := range names { + if _, ok := blacklist.Map[name]; ok { + return true + } + } + return false +} diff --git a/mihomo/component/resource/vehicle.go b/mihomo/component/resource/vehicle.go index 2d71be9455..b13369d22e 100644 --- a/mihomo/component/resource/vehicle.go +++ b/mihomo/component/resource/vehicle.go @@ -28,6 +28,10 @@ func (f *FileVehicle) Read() ([]byte, error) { return os.ReadFile(f.path) } +func (f *FileVehicle) Proxy() string { + return "" +} + func NewFileVehicle(path string) *FileVehicle { return &FileVehicle{path: path} } @@ -51,6 +55,10 @@ func (h *HTTPVehicle) Path() string { return h.path } +func (h *HTTPVehicle) Proxy() string { + return h.proxy +} + func (h *HTTPVehicle) Read() ([]byte, error) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*20) defer cancel() diff --git a/mihomo/constant/provider/interface.go b/mihomo/constant/provider/interface.go index f2b6939e81..bb73d1bce2 100644 --- a/mihomo/constant/provider/interface.go +++ b/mihomo/constant/provider/interface.go @@ -31,6 +31,7 @@ func (v VehicleType) String() string { type Vehicle interface { Read() ([]byte, error) Path() string + Proxy() string Type() VehicleType } diff --git a/mihomo/dns/system.go b/mihomo/dns/system.go index 37607a60bf..dc1006fa56 100644 --- a/mihomo/dns/system.go +++ b/mihomo/dns/system.go @@ -8,6 +8,7 @@ import ( "sync" "time" + "github.com/metacubex/mihomo/component/resolver" "github.com/metacubex/mihomo/log" D "github.com/miekg/dns" @@ -39,6 +40,9 @@ func (c *systemClient) getDnsClients() ([]dnsClient, error) { if nameservers, err = dnsReadConfig(); err == nil { log.Debugln("[DNS] system dns update to %s", nameservers) for _, addr := range nameservers { + if resolver.IsSystemDnsBlacklisted(addr) { + continue + } if _, ok := c.dnsClients[addr]; !ok { clients := transform( []NameServer{{ diff --git a/mihomo/listener/sing_tun/server.go b/mihomo/listener/sing_tun/server.go index 8fe534df53..7cd9fc7235 100644 --- a/mihomo/listener/sing_tun/server.go +++ b/mihomo/listener/sing_tun/server.go @@ -12,6 +12,7 @@ import ( "github.com/metacubex/mihomo/adapter/inbound" "github.com/metacubex/mihomo/component/dialer" "github.com/metacubex/mihomo/component/iface" + "github.com/metacubex/mihomo/component/resolver" C "github.com/metacubex/mihomo/constant" LC "github.com/metacubex/mihomo/listener/config" "github.com/metacubex/mihomo/listener/sing" @@ -39,6 +40,8 @@ type Listener struct { networkUpdateMonitor tun.NetworkUpdateMonitor defaultInterfaceMonitor tun.DefaultInterfaceMonitor packageManager tun.PackageManager + + dnsServerIp []string } func CalculateInterfaceName(name string) (tunName string) { @@ -147,12 +150,16 @@ func New(options LC.Tun, tunnel C.Tunnel, additions ...inbound.Addition) (l *Lis dnsAdds = append(dnsAdds, addrPort) } + + var dnsServerIp []string for _, a := range options.Inet4Address { addrPort := netip.AddrPortFrom(a.Addr().Next(), 53) + dnsServerIp = append(dnsServerIp, a.Addr().Next().String()) dnsAdds = append(dnsAdds, addrPort) } for _, a := range options.Inet6Address { addrPort := netip.AddrPortFrom(a.Addr().Next(), 53) + dnsServerIp = append(dnsServerIp, a.Addr().Next().String()) dnsAdds = append(dnsAdds, addrPort) } @@ -244,6 +251,10 @@ func New(options LC.Tun, tunnel C.Tunnel, additions ...inbound.Addition) (l *Lis return } + l.dnsServerIp = dnsServerIp + // after tun.New sing-tun has set DNS to TUN interface + resolver.AddSystemDnsBlacklist(dnsServerIp...) + stackOptions := tun.StackOptions{ Context: context.TODO(), Tun: tunIf, @@ -336,6 +347,7 @@ func parseRange(uidRanges []ranges.Range[uint32], rangeList []string) ([]ranges. func (l *Listener) Close() error { l.closed = true + resolver.RemoveSystemDnsBlacklist(l.dnsServerIp...) return common.Close( l.tunStack, l.tunIf, diff --git a/openwrt-passwall/luci-app-passwall/luasrc/passwall/util_xray.lua b/openwrt-passwall/luci-app-passwall/luasrc/passwall/util_xray.lua index 56f6008b8d..54d0d30327 100644 --- a/openwrt-passwall/luci-app-passwall/luasrc/passwall/util_xray.lua +++ b/openwrt-passwall/luci-app-passwall/luasrc/passwall/util_xray.lua @@ -642,24 +642,31 @@ function gen_config(var) end end - local function get_balancer_tag(_node_id) - return "balancer-" .. _node_id - end - - local function gen_loopback(outboundTag, dst_node_id) - if not outboundTag then return nil end - local inboundTag = dst_node_id and "loop-in-" .. dst_node_id or outboundTag .. "-lo" + local function gen_loopback(outbound_tag, loopback_dst) + if not outbound_tag or outbound_tag == "" then return nil end + local inbound_tag = loopback_dst and "lo-to-" .. loopback_dst or outbound_tag .. "-lo" table.insert(outbounds, { protocol = "loopback", - tag = outboundTag, - settings = { inboundTag = inboundTag } + tag = outbound_tag, + settings = { inboundTag = inbound_tag } }) - return inboundTag + return inbound_tag end - local function gen_balancer(_node, loopbackTag) + local function gen_balancer(_node, loopback_tag) + local balancer_id = _node[".name"] + local balancer_tag = "balancer-" .. balancer_id + local loopback_dst = balancer_id -- route destination for the loopback outbound + if not loopback_tag or loopback_tag == "" then loopback_tag = balancer_id end + -- existing balancer + for _, v in ipairs(balancers) do + if v.tag == balancer_tag then + gen_loopback(loopback_tag, loopback_dst) + return balancer_tag + end + end + -- new balancer local blc_nodes = _node.balancing_node - local fallback_node_id = _node.fallback_node local length = #blc_nodes local valid_nodes = {} for i = 1, length do @@ -682,10 +689,11 @@ function gen_config(var) end end end - if fallback_node_id == "" then - fallback_node_id = nil - end - if fallback_node_id then + if #valid_nodes == 0 then return nil end + + -- fallback node + local fallback_node_id = _node.fallback_node + if fallback_node_id and fallback_node_id ~= "" then local is_new_node = true for _, outbound in ipairs(outbounds) do if outbound.tag == fallback_node_id then @@ -703,44 +711,36 @@ function gen_config(var) fallback_node_id = nil end else - local valid = gen_balancer(fallback_node) - if not valid then + if not gen_balancer(fallback_node) then fallback_node_id = nil end end end end - - local valid = nil - if #valid_nodes > 0 then - local balancerTag = get_balancer_tag(_node[".name"]) - table.insert(balancers, { - tag = balancerTag, - selector = valid_nodes, - fallbackTag = fallback_node_id, - strategy = { type = _node.balancingStrategy or "random" } - }) - if _node.balancingStrategy == "leastPing" then - if not observatory then - observatory = { - subjectSelector = { "blc-" }, - probeUrl = _node.useCustomProbeUrl and _node.probeUrl or nil, - probeInterval = _node.probeInterval or "1m", - enableConcurrency = true - } - end + table.insert(balancers, { + tag = balancer_tag, + selector = valid_nodes, + fallbackTag = fallback_node_id, + strategy = { type = _node.balancingStrategy or "random" } + }) + if _node.balancingStrategy == "leastPing" then + if not observatory then + observatory = { + subjectSelector = { "blc-" }, + probeUrl = _node.useCustomProbeUrl and _node.probeUrl or nil, + probeInterval = _node.probeInterval or "1m", + enableConcurrency = true + } end - if loopbackTag == nil or loopbackTag =="" then loopbackTag = _node[".name"] end - local inboundTag = gen_loopback(loopbackTag, _node[".name"]) - table.insert(rules, { type = "field", inboundTag = { inboundTag }, balancerTag = balancerTag }) - valid = true end - return valid + local inbound_tag = gen_loopback(loopback_tag, loopback_dst) + table.insert(rules, { type = "field", inboundTag = { inbound_tag }, balancerTag = balancer_tag }) + return balancer_tag end local function set_outbound_detour(node, outbound, outbounds_table, shunt_rule_name) if not node or not outbound or not outbounds_table then return nil end - local default_outTag = outbound.tag + local default_out_tag = outbound.tag if node.to_node then local to_node = uci:get_all(appname, node.to_node) @@ -759,33 +759,35 @@ function gen_config(var) transportLayer = true } table.insert(outbounds_table, to_outbound) - default_outTag = to_outbound.tag + default_out_tag = to_outbound.tag end end end - return default_outTag + return default_out_tag end if node.protocol == "_shunt" then - local proxy_tag = "main" local proxy_node_id = node["main_node"] - local proxy_node = node.preproxy_enabled == "1" and proxy_node_id and uci:get_all(appname, proxy_node_id) or nil - local proxy_outboundTag, proxy_balancerTag + local proxy_tag = "main" + local proxy_balancer_tag + local proxy_nodes local function gen_shunt_node(rule_name, _node_id) if not rule_name then return nil, nil end - if not _node_id then _node_id = node[rule_name] or "nil" end - local rule_outboundTag - local rule_balancerTag + if not _node_id then + _node_id = node[rule_name] or nil + if not _node_id then return nil, nil end + end if _node_id == "_direct" then - rule_outboundTag = "direct" + return "direct", nil elseif _node_id == "_blackhole" then - rule_outboundTag = "blackhole" + return "blackhole", nil elseif _node_id == "_default" then - rule_outboundTag = "default" + return "default", nil elseif _node_id:find("Socks_") then local socks_id = _node_id:sub(1 + #"Socks_") local socks_node = uci:get_all(appname, socks_id) or nil + local socks_tag if socks_node then local _node = { type = "Xray", @@ -798,124 +800,121 @@ function gen_config(var) local outbound = gen_outbound(flag, _node, rule_name) if outbound then table.insert(outbounds, outbound) - rule_outboundTag = rule_name + socks_tag = outbound.tag end end - elseif _node_id ~= "nil" then - local _node = uci:get_all(appname, _node_id) - if not _node then return nil, nil end + return socks_tag, nil + end - if api.is_normal_node(_node) then - local use_proxy = proxy_node and node[rule_name .. "_proxy_tag"] == proxy_tag and _node_id ~= proxy_node_id - if use_proxy and proxy_balancerTag then - for _, blc_node_id in ipairs(proxy_node.balancing_node) do - if _node_id == blc_node_id then - use_proxy = false - break - end - end + local _node = uci:get_all(appname, _node_id) + if not _node then return nil, nil end + + if api.is_normal_node(_node) then + local use_proxy = proxy_tag and node[rule_name .. "_proxy_tag"] == proxy_tag and _node_id ~= proxy_node_id + if use_proxy and proxy_balancer_tag and proxy_nodes[_node_id] then use_proxy = false end + local copied_outbound + for index, value in ipairs(outbounds) do + if value["_flag_tag"] == _node_id and value["_flag_proxy_tag"] == (use_proxy and proxy_tag or "nil") then + copied_outbound = api.clone(value) + break end - local copied_outbound - for index, value in ipairs(outbounds) do - if value["_flag_tag"] == _node_id and value["_flag_proxy_tag"] == proxy_tag then - copied_outbound = api.clone(value) - break - end + end + if copied_outbound then + copied_outbound.tag = rule_name + table.insert(outbounds, copied_outbound) + return copied_outbound.tag, nil + end + --new outbound + if use_proxy and (_node.type ~= "Xray" or _node.flow == "xtls-rprx-vision") then + new_port = get_new_port() + table.insert(inbounds, { + tag = "proxy_" .. rule_name, + listen = "127.0.0.1", + port = new_port, + protocol = "dokodemo-door", + settings = {network = "tcp,udp", address = _node.address, port = tonumber(_node.port)} + }) + if _node.tls_serverName == nil then + _node.tls_serverName = _node.address end - if copied_outbound then - copied_outbound.tag = rule_name - table.insert(outbounds, copied_outbound) - rule_outboundTag = rule_name - else - if use_proxy and (_node.type ~= "Xray" or _node.flow == "xtls-rprx-vision") then - new_port = get_new_port() - table.insert(inbounds, { - tag = "proxy_" .. rule_name, - listen = "127.0.0.1", - port = new_port, - protocol = "dokodemo-door", - settings = {network = "tcp,udp", address = _node.address, port = tonumber(_node.port)} - }) - if _node.tls_serverName == nil then - _node.tls_serverName = _node.address - end - _node.address = "127.0.0.1" - _node.port = new_port - table.insert(rules, 1, { - type = "field", - inboundTag = {"proxy_" .. rule_name}, - outboundTag = proxy_outboundTag, - balancerTag = proxy_balancerTag - }) - end - local proxy_table = { - proxy = use_proxy and 1 or 0, - tag = use_proxy and proxy_tag or nil - } - if xray_settings.fragment == "1" and not proxy_table.tag then - proxy_table.fragment = true - end - local outbound = gen_outbound(flag, _node, rule_name, proxy_table) - if outbound then - set_outbound_detour(_node, outbound, outbounds, rule_name) - table.insert(outbounds, outbound) - rule_outboundTag = rule_name - end - end - elseif _node.protocol == "_balancing" then - local is_new_balancer = true - rule_balancerTag = get_balancer_tag(_node_id) - for _, v in ipairs(balancers) do - if v.tag == rule_balancerTag then - is_new_balancer = false - gen_loopback(rule_name, _node_id) - break - end - end - if is_new_balancer then - local valid = gen_balancer(_node, rule_name) - if not valid then - rule_balancerTag = nil - end - end - elseif _node.protocol == "_iface" then - if _node.iface then - local outbound = { - protocol = "freedom", - tag = rule_name, - streamSettings = { - sockopt = { - mark = 255, - interface = _node.iface - } + _node.address = "127.0.0.1" + _node.port = new_port + table.insert(rules, 1, { + type = "field", + inboundTag = {"proxy_" .. rule_name}, + outboundTag = not proxy_balancer_tag and proxy_tag or nil, + balancerTag = proxy_balancer_tag + }) + end + local proxy_table = { + proxy = use_proxy and 1 or 0, + tag = use_proxy and proxy_tag or nil + } + if xray_settings.fragment == "1" and not proxy_table.tag then + proxy_table.fragment = true + end + local outbound = gen_outbound(flag, _node, rule_name, proxy_table) + local outbound_tag + if outbound then + set_outbound_detour(_node, outbound, outbounds, rule_name) + table.insert(outbounds, outbound) + outbound_tag = outbound.tag + end + return outbound_tag, nil + elseif _node.protocol == "_balancing" then + + return nil, gen_balancer(_node, rule_name) + elseif _node.protocol == "_iface" then + if _node.iface then + local outbound = { + protocol = "freedom", + tag = rule_name, + streamSettings = { + sockopt = { + mark = 255, + interface = _node.iface } } - table.insert(outbounds, outbound) - rule_outboundTag = rule_name - sys.call("touch /tmp/etc/passwall/iface/" .. _node.iface) - end + } + table.insert(outbounds, outbound) + sys.call("touch /tmp/etc/passwall/iface/" .. _node.iface) + return outbound.tag, nil end end - return rule_outboundTag, rule_balancerTag end --proxy_node - if proxy_node then - proxy_outboundTag, proxy_balancerTag = gen_shunt_node(proxy_tag, proxy_node_id) - if not proxy_outboundTag and not proxy_balancerTag then - proxy_node = nil + if node.preproxy_enabled == "1" and proxy_node_id then + local proxy_outbound_tag + proxy_outbound_tag, proxy_balancer_tag = gen_shunt_node(proxy_tag, proxy_node_id) + if proxy_balancer_tag then + local _node_id = proxy_node_id + proxy_nodes = {} + while _node_id do + _node = uci:get_all(appname, _node_id) + if not _node then break end + if _node.protocol ~= "_balancing" then + proxy_nodes[_node_id] = true + break + end + local _blc_nodes = _node.balancing_node + for i = 1, #_blc_nodes do proxy_nodes[_blc_nodes[i]] = true end + _node_id = _node.fallback_node + end + else + proxy_tag = proxy_outbound_tag end end --default_node local default_node_id = node.default_node or "_direct" - local default_outboundTag, default_balancerTag = gen_shunt_node("default", default_node_id) + local default_outbound_tag, default_balancer_tag = gen_shunt_node("default", default_node_id) --shunt rule uci:foreach(appname, "shunt_rules", function(e) - local outboundTag, balancerTag = gen_shunt_node(e[".name"]) - if outboundTag or balancerTag and e.remarks then - if outboundTag == "default" then - outboundTag = default_outboundTag - balancerTag = default_balancerTag + local outbound_tag, balancer_tag = gen_shunt_node(e[".name"]) + if outbound_tag or balancer_tag and e.remarks then + if outbound_tag == "default" then + outbound_tag = default_outbound_tag + balancer_tag = default_balancer_tag end local protocols = nil if e["protocol"] and e["protocol"] ~= "" then @@ -924,20 +923,20 @@ function gen_config(var) table.insert(protocols, w) end) end - local inboundTag = nil + local inbound_tag = nil if e["inbound"] and e["inbound"] ~= "" then - inboundTag = {} + inbound_tag = {} if e["inbound"]:find("tproxy") then if tcp_redir_port then - table.insert(inboundTag, "tcp_redir") + table.insert(inbound_tag, "tcp_redir") end if udp_redir_port then - table.insert(inboundTag, "udp_redir") + table.insert(inbound_tag, "udp_redir") end end if e["inbound"]:find("socks") then if local_socks_port then - table.insert(inboundTag, "socks-in") + table.insert(inbound_tag, "socks-in") end end end @@ -965,9 +964,9 @@ function gen_config(var) local rule = { _flag = e.remarks, type = "field", - inboundTag = inboundTag, - outboundTag = outboundTag, - balancerTag = balancerTag, + inboundTag = inbound_tag, + outboundTag = outbound_tag, + balancerTag = balancer_tag, network = e["network"] or "tcp,udp", source = source, sourcePort = nil, @@ -992,11 +991,11 @@ function gen_config(var) end end) - if default_outboundTag or default_balancerTag then + if default_outbound_tag or default_balancer_tag then table.insert(rules, { type = "field", - outboundTag = default_outboundTag, - balancerTag = default_balancerTag, + outboundTag = default_outbound_tag, + balancerTag = default_balancer_tag, network = "tcp,udp" }) end @@ -1009,9 +1008,9 @@ function gen_config(var) } elseif node.protocol == "_balancing" then if node.balancing_node then - local valid = gen_balancer(node) - if valid then - table.insert(rules, { type = "field", network = "tcp,udp", balancerTag = get_balancer_tag(node_id) }) + local balancer_tag = gen_balancer(node) + if balancer_tag then + table.insert(rules, { type = "field", network = "tcp,udp", balancerTag = balancer_tag }) end routing = { balancers = balancers, @@ -1114,11 +1113,11 @@ function gen_config(var) end end ]]-- - local dns_outboundTag = "direct" + local dns_outbound_tag = "direct" if dns_socks_address and dns_socks_port then - dns_outboundTag = "out" + dns_outbound_tag = "out" table.insert(outbounds, 1, { - tag = dns_outboundTag, + tag = dns_outbound_tag, protocol = "socks", streamSettings = { network = "tcp", @@ -1138,10 +1137,10 @@ function gen_config(var) }) else if node_id and tcp_redir_port then - dns_outboundTag = node_id + dns_outbound_tag = node_id local node = uci:get_all(appname, node_id) if node.protocol == "_shunt" then - dns_outboundTag = "default" + dns_outbound_tag = "default" end end end @@ -1163,7 +1162,7 @@ function gen_config(var) tag = "dns-out", protocol = "dns", proxySettings = { - tag = dns_outboundTag + tag = dns_outbound_tag }, settings = { address = remote_dns_tcp_server, @@ -1190,7 +1189,7 @@ function gen_config(var) remote_dns_tcp_server }, port = tonumber(remote_dns_tcp_port), - outboundTag = dns_outboundTag + outboundTag = dns_outbound_tag }) if _remote_dns_host then table.insert(rules, { @@ -1202,7 +1201,7 @@ function gen_config(var) _remote_dns_host }, port = tonumber(remote_dns_doh_port), - outboundTag = dns_outboundTag + outboundTag = dns_outbound_tag }) end if remote_dns_doh_ip then @@ -1215,7 +1214,7 @@ function gen_config(var) remote_dns_doh_ip }, port = tonumber(remote_dns_doh_port), - outboundTag = dns_outboundTag + outboundTag = dns_outbound_tag }) end diff --git a/openwrt-passwall/luci-app-passwall/root/usr/share/passwall/app.sh b/openwrt-passwall/luci-app-passwall/root/usr/share/passwall/app.sh index dfd9870c2a..5602597ca5 100755 --- a/openwrt-passwall/luci-app-passwall/root/usr/share/passwall/app.sh +++ b/openwrt-passwall/luci-app-passwall/root/usr/share/passwall/app.sh @@ -327,7 +327,7 @@ run_ipt2socks() { run_singbox() { local flag type node tcp_redir_port udp_redir_port socks_address socks_port socks_username socks_password http_address http_port http_username http_password local dns_listen_port direct_dns_port direct_dns_udp_server remote_dns_protocol remote_dns_udp_server remote_dns_tcp_server remote_dns_doh remote_fakedns remote_dns_query_strategy dns_cache dns_socks_address dns_socks_port - local loglevel log_file config_file + local loglevel log_file config_file server_host server_port local _extra_param="" eval_set_val $@ [ -z "$type" ] && { @@ -353,6 +353,8 @@ run_singbox() { [ -n "$flag" ] && _extra_param="${_extra_param} -flag $flag" [ -n "$node" ] && _extra_param="${_extra_param} -node $node" + [ -n "$server_host" ] && _extra_param="${_extra_param} -server_host $server_host" + [ -n "$server_port" ] && _extra_param="${_extra_param} -server_port $server_port" [ -n "$tcp_redir_port" ] && _extra_param="${_extra_param} -tcp_redir_port $tcp_redir_port" [ -n "$udp_redir_port" ] && _extra_param="${_extra_param} -udp_redir_port $udp_redir_port" [ -n "$socks_address" ] && _extra_param="${_extra_param} -local_socks_address $socks_address" @@ -364,7 +366,7 @@ run_singbox() { [ -n "$dns_socks_address" ] && [ -n "$dns_socks_port" ] && _extra_param="${_extra_param} -dns_socks_address ${dns_socks_address} -dns_socks_port ${dns_socks_port}" [ -n "$dns_listen_port" ] && _extra_param="${_extra_param} -dns_listen_port ${dns_listen_port}" [ -n "$dns_cache" ] && _extra_param="${_extra_param} -dns_cache ${dns_cache}" - + local local_dns=$(echo -n $(echo "${LOCAL_DNS}" | sed "s/,/\n/g" | head -n1) | tr " " ",") [ -z "$direct_dns_udp_server" ] && direct_dns_udp_server=$(echo ${local_dns} | awk -F '#' '{print $1}') [ -z "$direct_dns_port" ] && direct_dns_port=$(echo ${local_dns} | awk -F '#' '{print $2}') @@ -404,7 +406,7 @@ run_singbox() { run_xray() { local flag type node tcp_redir_port udp_redir_port socks_address socks_port socks_username socks_password http_address http_port http_username http_password local dns_listen_port remote_dns_udp_server remote_dns_tcp_server remote_dns_doh dns_client_ip dns_query_strategy dns_cache dns_socks_address dns_socks_port - local loglevel log_file config_file + local loglevel log_file config_file server_host server_port local _extra_param="" eval_set_val $@ [ -z "$type" ] && { @@ -419,6 +421,8 @@ run_xray() { [ -z "$loglevel" ] && local loglevel=$(config_t_get global loglevel "warning") [ -n "$flag" ] && _extra_param="${_extra_param} -flag $flag" [ -n "$node" ] && _extra_param="${_extra_param} -node $node" + [ -n "$server_host" ] && _extra_param="${_extra_param} -server_host $server_host" + [ -n "$server_port" ] && _extra_param="${_extra_param} -server_port $server_port" [ -n "$tcp_redir_port" ] && _extra_param="${_extra_param} -tcp_redir_port $tcp_redir_port" [ -n "$udp_redir_port" ] && _extra_param="${_extra_param} -udp_redir_port $udp_redir_port" [ -n "$socks_address" ] && _extra_param="${_extra_param} -local_socks_address $socks_address" @@ -478,7 +482,7 @@ run_dns2socks() { run_chinadns_ng() { local _listen_port _dns_china _dns_trust _chnlist _gfwlist _no_ipv6_rules _log_path _no_logic_log eval_set_val $@ - + local _LOG_FILE=$LOG_FILE [ -n "$_no_logic_log" ] && LOG_FILE="/dev/null" @@ -595,7 +599,7 @@ run_socks() { config_file=$(echo $config_file | sed "s/SOCKS/HTTP_SOCKS/g") local _args="http_port=$http_port" } - [ -n "$relay_port" ] && _args="${_args} -server_host $server_host -server_port $port" + [ -n "$relay_port" ] && _args="${_args} server_host=$server_host server_port=$port" run_singbox flag=$flag node=$node socks_port=$socks_port config_file=$config_file log_file=$log_file ${_args} ;; xray) @@ -604,7 +608,7 @@ run_socks() { config_file=$(echo $config_file | sed "s/SOCKS/HTTP_SOCKS/g") local _args="http_port=$http_port" } - [ -n "$relay_port" ] && _args="${_args} -server_host $server_host -server_port $port" + [ -n "$relay_port" ] && _args="${_args} server_host=$server_host server_port=$port" run_xray flag=$flag node=$node socks_port=$socks_port config_file=$config_file log_file=$log_file ${_args} ;; trojan*) @@ -646,7 +650,7 @@ run_socks() { ln_run "$(first_type tuic-client)" "tuic-client" $log_file -c "$config_file" ;; esac - + eval node_${node}_socks_port=$socks_port # http to socks @@ -1382,7 +1386,7 @@ acl_app() { [ "$dns_mode" = "sing-box" ] && { [ "$v2ray_dns_mode" = "doh" ] && remote_dns=${remote_dns_doh:-https://1.1.1.1/dns-query} } - + [ "${use_global_config}" = "1" ] && { tcp_node="default" udp_node="default" diff --git a/ryujinx/src/Ryujinx.Graphics.Vulkan/Window.cs b/ryujinx/src/Ryujinx.Graphics.Vulkan/Window.cs index 5ddb6eedae..a4ac9e9f19 100644 --- a/ryujinx/src/Ryujinx.Graphics.Vulkan/Window.cs +++ b/ryujinx/src/Ryujinx.Graphics.Vulkan/Window.cs @@ -330,6 +330,7 @@ namespace Ryujinx.Graphics.Vulkan _swapchainIsDirty) { RecreateSwapchain(); + semaphoreIndex = (_frameIndex - 1) % _imageAvailableSemaphores.Length; } else { diff --git a/shadowsocks-rust/crates/shadowsocks-service/src/config.rs b/shadowsocks-rust/crates/shadowsocks-service/src/config.rs index 8b86ca1122..11f614406d 100644 --- a/shadowsocks-rust/crates/shadowsocks-service/src/config.rs +++ b/shadowsocks-rust/crates/shadowsocks-service/src/config.rs @@ -124,6 +124,14 @@ struct SSConfig { #[serde(skip_serializing_if = "Option::is_none")] local_port: Option, + /// macOS launch activate socket for local_port + #[cfg(target_os = "macos")] + #[serde(skip_serializing_if = "Option::is_none")] + launchd_udp_socket_name: Option, + #[cfg(target_os = "macos")] + #[serde(skip_serializing_if = "Option::is_none")] + launchd_tcp_socket_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] protocol: Option, @@ -1501,6 +1509,11 @@ impl Config { } }, }; + #[cfg(target_os = "macos")] + { + local_config.launchd_tcp_socket_name = config.launchd_tcp_socket_name.clone(); + local_config.launchd_udp_socket_name = config.launchd_udp_socket_name.clone(); + } let local_instance = LocalInstanceConfig { config: local_config, @@ -2545,6 +2558,12 @@ impl fmt::Display for Config { }); } + #[cfg(target_os = "macos")] + { + jconf.launchd_tcp_socket_name = local.launchd_tcp_socket_name.clone(); + jconf.launchd_udp_socket_name = local.launchd_udp_socket_name.clone(); + } + if local.protocol != ProtocolType::Socks { jconf.protocol = Some(local.protocol.as_str().to_owned()); } diff --git a/sing-box/.github/workflows/linux.yml b/sing-box/.github/workflows/linux.yml index 28ac185de3..8312eebbc7 100644 --- a/sing-box/.github/workflows/linux.yml +++ b/sing-box/.github/workflows/linux.yml @@ -19,6 +19,7 @@ jobs: go-version: ^1.22 - name: Extract signing key run: |- + mkdir -p $HOME/.gnupg cat > $HOME/.gnupg/sagernet.key < 0 then - local balancerTag = get_balancer_tag(_node[".name"]) - table.insert(balancers, { - tag = balancerTag, - selector = valid_nodes, - fallbackTag = fallback_node_id, - strategy = { type = _node.balancingStrategy or "random" } - }) - if _node.balancingStrategy == "leastPing" then - if not observatory then - observatory = { - subjectSelector = { "blc-" }, - probeUrl = _node.useCustomProbeUrl and _node.probeUrl or nil, - probeInterval = _node.probeInterval or "1m", - enableConcurrency = true - } - end + table.insert(balancers, { + tag = balancer_tag, + selector = valid_nodes, + fallbackTag = fallback_node_id, + strategy = { type = _node.balancingStrategy or "random" } + }) + if _node.balancingStrategy == "leastPing" then + if not observatory then + observatory = { + subjectSelector = { "blc-" }, + probeUrl = _node.useCustomProbeUrl and _node.probeUrl or nil, + probeInterval = _node.probeInterval or "1m", + enableConcurrency = true + } end - if loopbackTag == nil or loopbackTag =="" then loopbackTag = _node[".name"] end - local inboundTag = gen_loopback(loopbackTag, _node[".name"]) - table.insert(rules, { type = "field", inboundTag = { inboundTag }, balancerTag = balancerTag }) - valid = true end - return valid + local inbound_tag = gen_loopback(loopback_tag, loopback_dst) + table.insert(rules, { type = "field", inboundTag = { inbound_tag }, balancerTag = balancer_tag }) + return balancer_tag end local function set_outbound_detour(node, outbound, outbounds_table, shunt_rule_name) if not node or not outbound or not outbounds_table then return nil end - local default_outTag = outbound.tag + local default_out_tag = outbound.tag if node.to_node then local to_node = uci:get_all(appname, node.to_node) @@ -759,33 +759,35 @@ function gen_config(var) transportLayer = true } table.insert(outbounds_table, to_outbound) - default_outTag = to_outbound.tag + default_out_tag = to_outbound.tag end end end - return default_outTag + return default_out_tag end if node.protocol == "_shunt" then - local proxy_tag = "main" local proxy_node_id = node["main_node"] - local proxy_node = node.preproxy_enabled == "1" and proxy_node_id and uci:get_all(appname, proxy_node_id) or nil - local proxy_outboundTag, proxy_balancerTag + local proxy_tag = "main" + local proxy_balancer_tag + local proxy_nodes local function gen_shunt_node(rule_name, _node_id) if not rule_name then return nil, nil end - if not _node_id then _node_id = node[rule_name] or "nil" end - local rule_outboundTag - local rule_balancerTag + if not _node_id then + _node_id = node[rule_name] or nil + if not _node_id then return nil, nil end + end if _node_id == "_direct" then - rule_outboundTag = "direct" + return "direct", nil elseif _node_id == "_blackhole" then - rule_outboundTag = "blackhole" + return "blackhole", nil elseif _node_id == "_default" then - rule_outboundTag = "default" + return "default", nil elseif _node_id:find("Socks_") then local socks_id = _node_id:sub(1 + #"Socks_") local socks_node = uci:get_all(appname, socks_id) or nil + local socks_tag if socks_node then local _node = { type = "Xray", @@ -798,124 +800,121 @@ function gen_config(var) local outbound = gen_outbound(flag, _node, rule_name) if outbound then table.insert(outbounds, outbound) - rule_outboundTag = rule_name + socks_tag = outbound.tag end end - elseif _node_id ~= "nil" then - local _node = uci:get_all(appname, _node_id) - if not _node then return nil, nil end + return socks_tag, nil + end - if api.is_normal_node(_node) then - local use_proxy = proxy_node and node[rule_name .. "_proxy_tag"] == proxy_tag and _node_id ~= proxy_node_id - if use_proxy and proxy_balancerTag then - for _, blc_node_id in ipairs(proxy_node.balancing_node) do - if _node_id == blc_node_id then - use_proxy = false - break - end - end + local _node = uci:get_all(appname, _node_id) + if not _node then return nil, nil end + + if api.is_normal_node(_node) then + local use_proxy = proxy_tag and node[rule_name .. "_proxy_tag"] == proxy_tag and _node_id ~= proxy_node_id + if use_proxy and proxy_balancer_tag and proxy_nodes[_node_id] then use_proxy = false end + local copied_outbound + for index, value in ipairs(outbounds) do + if value["_flag_tag"] == _node_id and value["_flag_proxy_tag"] == (use_proxy and proxy_tag or "nil") then + copied_outbound = api.clone(value) + break end - local copied_outbound - for index, value in ipairs(outbounds) do - if value["_flag_tag"] == _node_id and value["_flag_proxy_tag"] == proxy_tag then - copied_outbound = api.clone(value) - break - end + end + if copied_outbound then + copied_outbound.tag = rule_name + table.insert(outbounds, copied_outbound) + return copied_outbound.tag, nil + end + --new outbound + if use_proxy and (_node.type ~= "Xray" or _node.flow == "xtls-rprx-vision") then + new_port = get_new_port() + table.insert(inbounds, { + tag = "proxy_" .. rule_name, + listen = "127.0.0.1", + port = new_port, + protocol = "dokodemo-door", + settings = {network = "tcp,udp", address = _node.address, port = tonumber(_node.port)} + }) + if _node.tls_serverName == nil then + _node.tls_serverName = _node.address end - if copied_outbound then - copied_outbound.tag = rule_name - table.insert(outbounds, copied_outbound) - rule_outboundTag = rule_name - else - if use_proxy and (_node.type ~= "Xray" or _node.flow == "xtls-rprx-vision") then - new_port = get_new_port() - table.insert(inbounds, { - tag = "proxy_" .. rule_name, - listen = "127.0.0.1", - port = new_port, - protocol = "dokodemo-door", - settings = {network = "tcp,udp", address = _node.address, port = tonumber(_node.port)} - }) - if _node.tls_serverName == nil then - _node.tls_serverName = _node.address - end - _node.address = "127.0.0.1" - _node.port = new_port - table.insert(rules, 1, { - type = "field", - inboundTag = {"proxy_" .. rule_name}, - outboundTag = proxy_outboundTag, - balancerTag = proxy_balancerTag - }) - end - local proxy_table = { - proxy = use_proxy and 1 or 0, - tag = use_proxy and proxy_tag or nil - } - if xray_settings.fragment == "1" and not proxy_table.tag then - proxy_table.fragment = true - end - local outbound = gen_outbound(flag, _node, rule_name, proxy_table) - if outbound then - set_outbound_detour(_node, outbound, outbounds, rule_name) - table.insert(outbounds, outbound) - rule_outboundTag = rule_name - end - end - elseif _node.protocol == "_balancing" then - local is_new_balancer = true - rule_balancerTag = get_balancer_tag(_node_id) - for _, v in ipairs(balancers) do - if v.tag == rule_balancerTag then - is_new_balancer = false - gen_loopback(rule_name, _node_id) - break - end - end - if is_new_balancer then - local valid = gen_balancer(_node, rule_name) - if not valid then - rule_balancerTag = nil - end - end - elseif _node.protocol == "_iface" then - if _node.iface then - local outbound = { - protocol = "freedom", - tag = rule_name, - streamSettings = { - sockopt = { - mark = 255, - interface = _node.iface - } + _node.address = "127.0.0.1" + _node.port = new_port + table.insert(rules, 1, { + type = "field", + inboundTag = {"proxy_" .. rule_name}, + outboundTag = not proxy_balancer_tag and proxy_tag or nil, + balancerTag = proxy_balancer_tag + }) + end + local proxy_table = { + proxy = use_proxy and 1 or 0, + tag = use_proxy and proxy_tag or nil + } + if xray_settings.fragment == "1" and not proxy_table.tag then + proxy_table.fragment = true + end + local outbound = gen_outbound(flag, _node, rule_name, proxy_table) + local outbound_tag + if outbound then + set_outbound_detour(_node, outbound, outbounds, rule_name) + table.insert(outbounds, outbound) + outbound_tag = outbound.tag + end + return outbound_tag, nil + elseif _node.protocol == "_balancing" then + + return nil, gen_balancer(_node, rule_name) + elseif _node.protocol == "_iface" then + if _node.iface then + local outbound = { + protocol = "freedom", + tag = rule_name, + streamSettings = { + sockopt = { + mark = 255, + interface = _node.iface } } - table.insert(outbounds, outbound) - rule_outboundTag = rule_name - sys.call("touch /tmp/etc/passwall/iface/" .. _node.iface) - end + } + table.insert(outbounds, outbound) + sys.call("touch /tmp/etc/passwall/iface/" .. _node.iface) + return outbound.tag, nil end end - return rule_outboundTag, rule_balancerTag end --proxy_node - if proxy_node then - proxy_outboundTag, proxy_balancerTag = gen_shunt_node(proxy_tag, proxy_node_id) - if not proxy_outboundTag and not proxy_balancerTag then - proxy_node = nil + if node.preproxy_enabled == "1" and proxy_node_id then + local proxy_outbound_tag + proxy_outbound_tag, proxy_balancer_tag = gen_shunt_node(proxy_tag, proxy_node_id) + if proxy_balancer_tag then + local _node_id = proxy_node_id + proxy_nodes = {} + while _node_id do + _node = uci:get_all(appname, _node_id) + if not _node then break end + if _node.protocol ~= "_balancing" then + proxy_nodes[_node_id] = true + break + end + local _blc_nodes = _node.balancing_node + for i = 1, #_blc_nodes do proxy_nodes[_blc_nodes[i]] = true end + _node_id = _node.fallback_node + end + else + proxy_tag = proxy_outbound_tag end end --default_node local default_node_id = node.default_node or "_direct" - local default_outboundTag, default_balancerTag = gen_shunt_node("default", default_node_id) + local default_outbound_tag, default_balancer_tag = gen_shunt_node("default", default_node_id) --shunt rule uci:foreach(appname, "shunt_rules", function(e) - local outboundTag, balancerTag = gen_shunt_node(e[".name"]) - if outboundTag or balancerTag and e.remarks then - if outboundTag == "default" then - outboundTag = default_outboundTag - balancerTag = default_balancerTag + local outbound_tag, balancer_tag = gen_shunt_node(e[".name"]) + if outbound_tag or balancer_tag and e.remarks then + if outbound_tag == "default" then + outbound_tag = default_outbound_tag + balancer_tag = default_balancer_tag end local protocols = nil if e["protocol"] and e["protocol"] ~= "" then @@ -924,20 +923,20 @@ function gen_config(var) table.insert(protocols, w) end) end - local inboundTag = nil + local inbound_tag = nil if e["inbound"] and e["inbound"] ~= "" then - inboundTag = {} + inbound_tag = {} if e["inbound"]:find("tproxy") then if tcp_redir_port then - table.insert(inboundTag, "tcp_redir") + table.insert(inbound_tag, "tcp_redir") end if udp_redir_port then - table.insert(inboundTag, "udp_redir") + table.insert(inbound_tag, "udp_redir") end end if e["inbound"]:find("socks") then if local_socks_port then - table.insert(inboundTag, "socks-in") + table.insert(inbound_tag, "socks-in") end end end @@ -965,9 +964,9 @@ function gen_config(var) local rule = { _flag = e.remarks, type = "field", - inboundTag = inboundTag, - outboundTag = outboundTag, - balancerTag = balancerTag, + inboundTag = inbound_tag, + outboundTag = outbound_tag, + balancerTag = balancer_tag, network = e["network"] or "tcp,udp", source = source, sourcePort = nil, @@ -992,11 +991,11 @@ function gen_config(var) end end) - if default_outboundTag or default_balancerTag then + if default_outbound_tag or default_balancer_tag then table.insert(rules, { type = "field", - outboundTag = default_outboundTag, - balancerTag = default_balancerTag, + outboundTag = default_outbound_tag, + balancerTag = default_balancer_tag, network = "tcp,udp" }) end @@ -1009,9 +1008,9 @@ function gen_config(var) } elseif node.protocol == "_balancing" then if node.balancing_node then - local valid = gen_balancer(node) - if valid then - table.insert(rules, { type = "field", network = "tcp,udp", balancerTag = get_balancer_tag(node_id) }) + local balancer_tag = gen_balancer(node) + if balancer_tag then + table.insert(rules, { type = "field", network = "tcp,udp", balancerTag = balancer_tag }) end routing = { balancers = balancers, @@ -1114,11 +1113,11 @@ function gen_config(var) end end ]]-- - local dns_outboundTag = "direct" + local dns_outbound_tag = "direct" if dns_socks_address and dns_socks_port then - dns_outboundTag = "out" + dns_outbound_tag = "out" table.insert(outbounds, 1, { - tag = dns_outboundTag, + tag = dns_outbound_tag, protocol = "socks", streamSettings = { network = "tcp", @@ -1138,10 +1137,10 @@ function gen_config(var) }) else if node_id and tcp_redir_port then - dns_outboundTag = node_id + dns_outbound_tag = node_id local node = uci:get_all(appname, node_id) if node.protocol == "_shunt" then - dns_outboundTag = "default" + dns_outbound_tag = "default" end end end @@ -1163,7 +1162,7 @@ function gen_config(var) tag = "dns-out", protocol = "dns", proxySettings = { - tag = dns_outboundTag + tag = dns_outbound_tag }, settings = { address = remote_dns_tcp_server, @@ -1190,7 +1189,7 @@ function gen_config(var) remote_dns_tcp_server }, port = tonumber(remote_dns_tcp_port), - outboundTag = dns_outboundTag + outboundTag = dns_outbound_tag }) if _remote_dns_host then table.insert(rules, { @@ -1202,7 +1201,7 @@ function gen_config(var) _remote_dns_host }, port = tonumber(remote_dns_doh_port), - outboundTag = dns_outboundTag + outboundTag = dns_outbound_tag }) end if remote_dns_doh_ip then @@ -1215,7 +1214,7 @@ function gen_config(var) remote_dns_doh_ip }, port = tonumber(remote_dns_doh_port), - outboundTag = dns_outboundTag + outboundTag = dns_outbound_tag }) end diff --git a/small/luci-app-passwall/root/usr/share/passwall/app.sh b/small/luci-app-passwall/root/usr/share/passwall/app.sh index dfd9870c2a..5602597ca5 100755 --- a/small/luci-app-passwall/root/usr/share/passwall/app.sh +++ b/small/luci-app-passwall/root/usr/share/passwall/app.sh @@ -327,7 +327,7 @@ run_ipt2socks() { run_singbox() { local flag type node tcp_redir_port udp_redir_port socks_address socks_port socks_username socks_password http_address http_port http_username http_password local dns_listen_port direct_dns_port direct_dns_udp_server remote_dns_protocol remote_dns_udp_server remote_dns_tcp_server remote_dns_doh remote_fakedns remote_dns_query_strategy dns_cache dns_socks_address dns_socks_port - local loglevel log_file config_file + local loglevel log_file config_file server_host server_port local _extra_param="" eval_set_val $@ [ -z "$type" ] && { @@ -353,6 +353,8 @@ run_singbox() { [ -n "$flag" ] && _extra_param="${_extra_param} -flag $flag" [ -n "$node" ] && _extra_param="${_extra_param} -node $node" + [ -n "$server_host" ] && _extra_param="${_extra_param} -server_host $server_host" + [ -n "$server_port" ] && _extra_param="${_extra_param} -server_port $server_port" [ -n "$tcp_redir_port" ] && _extra_param="${_extra_param} -tcp_redir_port $tcp_redir_port" [ -n "$udp_redir_port" ] && _extra_param="${_extra_param} -udp_redir_port $udp_redir_port" [ -n "$socks_address" ] && _extra_param="${_extra_param} -local_socks_address $socks_address" @@ -364,7 +366,7 @@ run_singbox() { [ -n "$dns_socks_address" ] && [ -n "$dns_socks_port" ] && _extra_param="${_extra_param} -dns_socks_address ${dns_socks_address} -dns_socks_port ${dns_socks_port}" [ -n "$dns_listen_port" ] && _extra_param="${_extra_param} -dns_listen_port ${dns_listen_port}" [ -n "$dns_cache" ] && _extra_param="${_extra_param} -dns_cache ${dns_cache}" - + local local_dns=$(echo -n $(echo "${LOCAL_DNS}" | sed "s/,/\n/g" | head -n1) | tr " " ",") [ -z "$direct_dns_udp_server" ] && direct_dns_udp_server=$(echo ${local_dns} | awk -F '#' '{print $1}') [ -z "$direct_dns_port" ] && direct_dns_port=$(echo ${local_dns} | awk -F '#' '{print $2}') @@ -404,7 +406,7 @@ run_singbox() { run_xray() { local flag type node tcp_redir_port udp_redir_port socks_address socks_port socks_username socks_password http_address http_port http_username http_password local dns_listen_port remote_dns_udp_server remote_dns_tcp_server remote_dns_doh dns_client_ip dns_query_strategy dns_cache dns_socks_address dns_socks_port - local loglevel log_file config_file + local loglevel log_file config_file server_host server_port local _extra_param="" eval_set_val $@ [ -z "$type" ] && { @@ -419,6 +421,8 @@ run_xray() { [ -z "$loglevel" ] && local loglevel=$(config_t_get global loglevel "warning") [ -n "$flag" ] && _extra_param="${_extra_param} -flag $flag" [ -n "$node" ] && _extra_param="${_extra_param} -node $node" + [ -n "$server_host" ] && _extra_param="${_extra_param} -server_host $server_host" + [ -n "$server_port" ] && _extra_param="${_extra_param} -server_port $server_port" [ -n "$tcp_redir_port" ] && _extra_param="${_extra_param} -tcp_redir_port $tcp_redir_port" [ -n "$udp_redir_port" ] && _extra_param="${_extra_param} -udp_redir_port $udp_redir_port" [ -n "$socks_address" ] && _extra_param="${_extra_param} -local_socks_address $socks_address" @@ -478,7 +482,7 @@ run_dns2socks() { run_chinadns_ng() { local _listen_port _dns_china _dns_trust _chnlist _gfwlist _no_ipv6_rules _log_path _no_logic_log eval_set_val $@ - + local _LOG_FILE=$LOG_FILE [ -n "$_no_logic_log" ] && LOG_FILE="/dev/null" @@ -595,7 +599,7 @@ run_socks() { config_file=$(echo $config_file | sed "s/SOCKS/HTTP_SOCKS/g") local _args="http_port=$http_port" } - [ -n "$relay_port" ] && _args="${_args} -server_host $server_host -server_port $port" + [ -n "$relay_port" ] && _args="${_args} server_host=$server_host server_port=$port" run_singbox flag=$flag node=$node socks_port=$socks_port config_file=$config_file log_file=$log_file ${_args} ;; xray) @@ -604,7 +608,7 @@ run_socks() { config_file=$(echo $config_file | sed "s/SOCKS/HTTP_SOCKS/g") local _args="http_port=$http_port" } - [ -n "$relay_port" ] && _args="${_args} -server_host $server_host -server_port $port" + [ -n "$relay_port" ] && _args="${_args} server_host=$server_host server_port=$port" run_xray flag=$flag node=$node socks_port=$socks_port config_file=$config_file log_file=$log_file ${_args} ;; trojan*) @@ -646,7 +650,7 @@ run_socks() { ln_run "$(first_type tuic-client)" "tuic-client" $log_file -c "$config_file" ;; esac - + eval node_${node}_socks_port=$socks_port # http to socks @@ -1382,7 +1386,7 @@ acl_app() { [ "$dns_mode" = "sing-box" ] && { [ "$v2ray_dns_mode" = "doh" ] && remote_dns=${remote_dns_doh:-https://1.1.1.1/dns-query} } - + [ "${use_global_config}" = "1" ] && { tcp_node="default" udp_node="default" diff --git a/suyu/README.md b/suyu/README.md index fc456c50f5..5baba68ea9 100644 --- a/suyu/README.md +++ b/suyu/README.md @@ -6,7 +6,7 @@ SPDX-License-Identifier: GPL-3.0-or-later **Note**: We do not support or condone piracy in any form. In order to use suyu, you'll need keys from your real Switch system, and games which you have legally obtained and paid for. We do not intend to make money or profit from this project. -We're in need of developers. Please join our Matrix below if you want to contribute! +We're in need of developers. Please join our chat below if you want to contribute! This repo is based on Yuzu EA 4176.
@@ -25,7 +25,7 @@ It is written in C++ with portability in mind, and we're actively working on bui

- Matrix | + Chat | Status | Development | Downloads | @@ -48,7 +48,7 @@ We currently have builds over at the [Releases](https://git.suyu.dev/suyu/suyu/r This project is completely free and open source, and anyone can contribute to help improve suyu. -Most of the development happens on GitLab. For development discussion, please join us on [Matrix](https://chat.suyu.dev). +Most of the development happens on GitLab. For development discussion, please join us in our [Chat](https://chat.suyu.dev). If you want to contribute, please take a look at the [Contributor's Guide](https://git.suyu.dev/suyu/suyu/wiki/Contributing) and [Developer Information](https://git.suyu.dev/suyu/suyu/wiki/Developer-Information). You can also contact any of the developers on Discord to learn more about the current state of suyu. @@ -77,7 +77,7 @@ We have official builds [here.](https://git.suyu.dev/suyu/suyu/releases) If any ## Support -If you have any questions, don't hesitate to ask us on [Matrix](https://chat.suyu.dev). We don't bite! +If you have any questions, don't hesitate to ask us in our [chat](https://chat.suyu.dev). We don't bite! ## License diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/AppConfig.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/AppConfig.kt index 0d96ac0238..7b099a48eb 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/AppConfig.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/AppConfig.kt @@ -63,7 +63,7 @@ object AppConfig { const val TASKER_EXTRA_BUNDLE_GUID = "tasker_extra_bundle_guid" const val TASKER_DEFAULT_GUID = "Default" - const val TAG_AGENT = "proxy" + const val TAG_PROXY = "proxy" const val TAG_DIRECT = "direct" const val TAG_BLOCKED = "block" const val TAG_FRAGMENT = "fragment" @@ -78,8 +78,9 @@ object AppConfig { const val promotionUrl = "aHR0cHM6Ly85LjIzNDQ1Ni54eXovYWJjLmh0bWw=" const val geoUrl = "https://github.com/Loyalsoldier/v2ray-rules-dat/releases/latest/download/" - const val DNS_AGENT = "1.1.1.1" + const val DNS_PROXY = "1.1.1.1" const val DNS_DIRECT = "223.5.5.5" + const val DNS_VPN = "1.1.1.1" const val PORT_LOCAL_DNS = "10853" const val PORT_SOCKS = "10808" diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/dto/ServerConfig.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/dto/ServerConfig.kt index b4cd3e164e..1999c9770c 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/dto/ServerConfig.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/dto/ServerConfig.kt @@ -1,6 +1,6 @@ package com.v2ray.ang.dto -import com.v2ray.ang.AppConfig.TAG_AGENT +import com.v2ray.ang.AppConfig.TAG_PROXY import com.v2ray.ang.AppConfig.TAG_BLOCKED import com.v2ray.ang.AppConfig.TAG_DIRECT import com.v2ray.ang.util.Utils @@ -58,7 +58,7 @@ data class ServerConfig( fun getAllOutboundTags(): MutableList { if (configType != EConfigType.CUSTOM) { - return mutableListOf(TAG_AGENT, TAG_DIRECT, TAG_BLOCKED) + return mutableListOf(TAG_PROXY, TAG_DIRECT, TAG_BLOCKED) } fullConfig?.let { config -> return config.outbounds.map { it.tag }.toMutableList() diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/RoutingSettingsFragment.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/RoutingSettingsFragment.kt index 88f65d4169..d4f772347a 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/RoutingSettingsFragment.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/RoutingSettingsFragment.kt @@ -127,7 +127,7 @@ class RoutingSettingsFragment : Fragment() { var tag = "" when (requireArguments().getString(routing_arg)) { AppConfig.PREF_V2RAY_ROUTING_AGENT -> { - tag = AppConfig.TAG_AGENT + tag = AppConfig.TAG_PROXY } AppConfig.PREF_V2RAY_ROUTING_DIRECT -> { tag = AppConfig.TAG_DIRECT diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/SettingsActivity.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/SettingsActivity.kt index 8bb9bea269..470540b5f9 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/SettingsActivity.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/SettingsActivity.kt @@ -120,7 +120,7 @@ class SettingsActivity : BaseActivity() { remoteDns?.setOnPreferenceChangeListener { _, any -> // remoteDns.summary = any as String val nval = any as String - remoteDns?.summary = if (nval == "") AppConfig.DNS_AGENT else nval + remoteDns?.summary = if (nval == "") AppConfig.DNS_PROXY else nval true } domesticDns?.setOnPreferenceChangeListener { _, any -> @@ -213,7 +213,7 @@ class SettingsActivity : BaseActivity() { autoUpdateInterval?.isEnabled = defaultSharedPreferences.getBoolean(AppConfig.SUBSCRIPTION_AUTO_UPDATE, false) if (TextUtils.isEmpty(remoteDnsString)) { - remoteDnsString = AppConfig.DNS_AGENT + remoteDnsString = AppConfig.DNS_PROXY } if (TextUtils.isEmpty(domesticDns?.summary)) { domesticDns?.summary = AppConfig.DNS_DIRECT diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/UserAssetActivity.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/UserAssetActivity.kt index dfdbfcf196..e007fc7e37 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/UserAssetActivity.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/ui/UserAssetActivity.kt @@ -177,7 +177,10 @@ class UserAssetActivity : BaseActivity() { assets.forEach { //toast(getString(R.string.msg_downloading_content) + it) lifecycleScope.launch(Dispatchers.IO) { - val result = downloadGeo(it.second, 60000, httpPort) + var result = downloadGeo(it.second, 60000, httpPort) + if (!result) { + result = downloadGeo(it.second, 60000, 0) + } launch(Dispatchers.Main) { if (result) { toast(getString(R.string.toast_success) + " " + it.second.remarks) @@ -197,12 +200,16 @@ class UserAssetActivity : BaseActivity() { //Log.d(AppConfig.ANG_PACKAGE, url) try { - conn = URL(item.url).openConnection( - Proxy( - Proxy.Type.HTTP, - InetSocketAddress("127.0.0.1", httpPort) - ) - ) as HttpURLConnection + conn = if (httpPort == 0) { + URL(item.url).openConnection() as HttpURLConnection + } else { + URL(item.url).openConnection( + Proxy( + Proxy.Type.HTTP, + InetSocketAddress("127.0.0.1", httpPort) + ) + ) as HttpURLConnection + } conn.connectTimeout = timeout conn.readTimeout = timeout val inputStream = conn.inputStream @@ -224,13 +231,14 @@ class UserAssetActivity : BaseActivity() { } private fun addBuiltInGeoItems(assets: List>): List> { val list = mutableListOf>() - builtInGeoFiles.forEach { - list.add(Utils.getUuid() to AssetUrlItem( - it, - AppConfig.geoUrl + it - ) - ) - } + builtInGeoFiles + .filter { geoFile -> assets.none { it.second.remarks == geoFile } } + .forEach { + list.add(Utils.getUuid() to AssetUrlItem( + it, + AppConfig.geoUrl + it + )) + } return list + assets } @@ -263,7 +271,7 @@ class UserAssetActivity : BaseActivity() { holder.itemUserAssetBinding.assetProperties.text = getString(R.string.msg_file_not_found) } - if (item.second.remarks in builtInGeoFiles) { + if (item.second.remarks in builtInGeoFiles && item.second.url == AppConfig.geoUrl + item.second.remarks) { holder.itemUserAssetBinding.layoutEdit.visibility = GONE holder.itemUserAssetBinding.layoutRemove.visibility = GONE } else { diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/Utils.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/Utils.kt index f2639db79f..4715363396 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/Utils.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/Utils.kt @@ -139,10 +139,10 @@ object Utils { * get remote dns servers from preference */ fun getRemoteDnsServers(): List { - val remoteDns = settingsStorage?.decodeString(AppConfig.PREF_REMOTE_DNS) ?: AppConfig.DNS_AGENT + val remoteDns = settingsStorage?.decodeString(AppConfig.PREF_REMOTE_DNS) ?: AppConfig.DNS_PROXY val ret = remoteDns.split(",").filter { isPureIpAddress(it) || isCoreDNSAddress(it) } if (ret.isEmpty()) { - return listOf(AppConfig.DNS_AGENT) + return listOf(AppConfig.DNS_PROXY) } return ret } @@ -150,7 +150,7 @@ object Utils { fun getVpnDnsServers(): List { val vpnDns = settingsStorage?.decodeString(AppConfig.PREF_VPN_DNS) ?: settingsStorage?.decodeString(AppConfig.PREF_REMOTE_DNS) - ?: AppConfig.DNS_AGENT + ?: AppConfig.DNS_VPN return vpnDns.split(",").filter { isPureIpAddress(it) } // allow empty, in that case dns will use system default } @@ -160,7 +160,7 @@ object Utils { */ fun getDomesticDnsServers(): List { val domesticDns = settingsStorage?.decodeString(AppConfig.PREF_DOMESTIC_DNS) ?: AppConfig.DNS_DIRECT - val ret = domesticDns.split(",").filter { isPureIpAddress(it) || isCoreDNSAddress(it) } + val ret = domesticDns.split(",").filter { isPureIpAddress(it) } if (ret.isEmpty()) { return listOf(AppConfig.DNS_DIRECT) } diff --git a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/V2rayConfigUtil.kt b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/V2rayConfigUtil.kt index 2c4911c2b3..a549610b22 100644 --- a/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/V2rayConfigUtil.kt +++ b/v2rayng/V2rayNG/app/src/main/kotlin/com/v2ray/ang/util/V2rayConfigUtil.kt @@ -175,7 +175,7 @@ object V2rayConfigUtil { try { routingUserRule( settingsStorage?.decodeString(AppConfig.PREF_V2RAY_ROUTING_AGENT) - ?: "", AppConfig.TAG_AGENT, v2rayConfig + ?: "", AppConfig.TAG_PROXY, v2rayConfig ) routingUserRule( settingsStorage?.decodeString(AppConfig.PREF_V2RAY_ROUTING_DIRECT) @@ -195,7 +195,7 @@ object V2rayConfigUtil { // Hardcode googleapis.cn val googleapisRoute = V2rayConfig.RoutingBean.RulesBean( - outboundTag = AppConfig.TAG_AGENT, + outboundTag = AppConfig.TAG_PROXY, domain = arrayListOf("domain:googleapis.cn") ) @@ -225,6 +225,15 @@ object V2rayConfigUtil { v2rayConfig.routing.rules.add(globalDirect) } } + + if(routingMode != ERoutingMode.GLOBAL_DIRECT.value) { + v2rayConfig.routing.rules.add( + V2rayConfig.RoutingBean.RulesBean( + outboundTag = AppConfig.TAG_PROXY, + port = "0-65535" + )) + } + } catch (e: Exception) { e.printStackTrace() return false @@ -340,7 +349,7 @@ object V2rayConfigUtil { val remoteDns = Utils.getRemoteDnsServers() if (v2rayConfig.inbounds.none { e -> e.protocol == "dokodemo-door" && e.tag == "dns-in" }) { val dnsInboundSettings = V2rayConfig.InboundBean.InSettingsBean( - address = if (Utils.isPureIpAddress(remoteDns.first())) remoteDns.first() else "1.1.1.1", + address = if (Utils.isPureIpAddress(remoteDns.first())) remoteDns.first() else AppConfig.DNS_PROXY, port = 53, network = "tcp,udp" ) @@ -477,7 +486,7 @@ object V2rayConfigUtil { if (Utils.isPureIpAddress(remoteDns.first())) { v2rayConfig.routing.rules.add( 0, V2rayConfig.RoutingBean.RulesBean( - outboundTag = AppConfig.TAG_AGENT, + outboundTag = AppConfig.TAG_PROXY, port = "53", ip = arrayListOf(remoteDns.first()), domain = null diff --git a/v2rayng/V2rayNG/app/src/main/res/values-zh-rCN/strings.xml b/v2rayng/V2rayNG/app/src/main/res/values-zh-rCN/strings.xml index bb6add7d18..a517af0a47 100644 --- a/v2rayng/V2rayNG/app/src/main/res/values-zh-rCN/strings.xml +++ b/v2rayng/V2rayNG/app/src/main/res/values-zh-rCN/strings.xml @@ -135,7 +135,7 @@ 预定义规则 自定义规则 - 远程DNS (可选) + 远程DNS (udp/tcp/https/quic)(可选) DNS VPN DNS (仅支持 IPv4/v6) diff --git a/v2rayng/V2rayNG/app/src/main/res/values-zh-rTW/strings.xml b/v2rayng/V2rayNG/app/src/main/res/values-zh-rTW/strings.xml index 86a56169e3..22c82d6ccd 100644 --- a/v2rayng/V2rayNG/app/src/main/res/values-zh-rTW/strings.xml +++ b/v2rayng/V2rayNG/app/src/main/res/values-zh-rTW/strings.xml @@ -135,7 +135,7 @@ 轉送模式 自訂轉送 - 遠端 DNS (可選) + 遠端DNS (udp/tcp/https/quic)(可選) DNS VPN DNS (僅支援 IPv4/v6) diff --git a/v2rayng/V2rayNG/app/src/main/res/values/strings.xml b/v2rayng/V2rayNG/app/src/main/res/values/strings.xml index e8facceebc..3bfd95d0f9 100644 --- a/v2rayng/V2rayNG/app/src/main/res/values/strings.xml +++ b/v2rayng/V2rayNG/app/src/main/res/values/strings.xml @@ -148,7 +148,7 @@ Predefined rules Custom rules - Remote DNS (Optional) + Remote DNS (udp/tcp/https/quic)(Optional) DNS VPN DNS (only IPv4/v6) diff --git a/xray-core/go.mod b/xray-core/go.mod index cf63cbfd51..807dec1f1b 100644 --- a/xray-core/go.mod +++ b/xray-core/go.mod @@ -11,7 +11,7 @@ require ( github.com/pelletier/go-toml v1.9.5 github.com/pires/go-proxyproto v0.7.0 github.com/quic-go/quic-go v0.42.0 - github.com/refraction-networking/utls v1.6.3 + github.com/refraction-networking/utls v1.6.4 github.com/sagernet/sing v0.3.8 github.com/sagernet/sing-shadowsocks v0.2.6 github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb diff --git a/xray-core/go.sum b/xray-core/go.sum index 4318384827..64362f4ed2 100644 --- a/xray-core/go.sum +++ b/xray-core/go.sum @@ -114,8 +114,8 @@ github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7q github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/quic-go/quic-go v0.42.0 h1:uSfdap0eveIl8KXnipv9K7nlwZ5IqLlYOpJ58u5utpM= github.com/quic-go/quic-go v0.42.0/go.mod h1:132kz4kL3F9vxhW3CtQJLDVwcFe5wdWeJXXijhsO57M= -github.com/refraction-networking/utls v1.6.3 h1:MFOfRN35sSx6K5AZNIoESsBuBxS2LCgRilRIdHb6fDc= -github.com/refraction-networking/utls v1.6.3/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs= +github.com/refraction-networking/utls v1.6.4 h1:aeynTroaYn7y+mFtqv8D0bQ4bw0y9nJHneGxJ7lvRDM= +github.com/refraction-networking/utls v1.6.4/go.mod h1:2VL2xfiqgFAZtJKeUTlf+PSYFs3Eu7km0gCtXJ3m8zs= github.com/riobard/go-bloom v0.0.0-20200614022211-cdc8013cb5b3 h1:f/FNXud6gA3MNr8meMVVGxhp+QBTqY91tM8HjEuMjGg= github.com/riobard/go-bloom v0.0.0-20200614022211-cdc8013cb5b3/go.mod h1:HgjTstvQsPGkxUsCd2KWxErBblirPizecHcpD3ffK+s= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= diff --git a/yass/.github/workflows/clang-tidy.yml b/yass/.github/workflows/clang-tidy.yml index 8a29cb9d8f..19edf4ce2e 100644 --- a/yass/.github/workflows/clang-tidy.yml +++ b/yass/.github/workflows/clang-tidy.yml @@ -18,7 +18,7 @@ on: schedule: - cron: '0 16 * * *' env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: win-clang-tidy: diff --git a/yass/.github/workflows/compiler.yml b/yass/.github/workflows/compiler.yml index 62956ea8c0..83337b33f6 100644 --- a/yass/.github/workflows/compiler.yml +++ b/yass/.github/workflows/compiler.yml @@ -33,7 +33,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: win-compiler-compatible-2022: diff --git a/yass/.github/workflows/releases-android-binary.yml b/yass/.github/workflows/releases-android-binary.yml index 5b7909cdd9..b4717851ef 100644 --- a/yass/.github/workflows/releases-android-binary.yml +++ b/yass/.github/workflows/releases-android-binary.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: android-binary-release: diff --git a/yass/.github/workflows/releases-deb.yml b/yass/.github/workflows/releases-deb.yml index 89ff18d105..ba7d132686 100644 --- a/yass/.github/workflows/releases-deb.yml +++ b/yass/.github/workflows/releases-deb.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct DH_QUIET: 1 jobs: @@ -169,7 +169,7 @@ jobs: path: | /etc/schroot/chroot.d /var/lib/schroot/chroots - key: ${{ runner.os }}-22.04-schroot-${{ env.HOST_DISTRO }}-${{ env.HOST_ARCH }}-v1 + key: ${{ runner.os }}-22.04-schroot-${{ env.HOST_DISTRO }}-${{ env.HOST_ARCH }}-v2 - name: Fix schroot permissions (restore root permissions) run: | sudo chown -R root:root /etc/schroot/chroot.d /var/lib/schroot/chroots @@ -205,7 +205,7 @@ jobs: if: ${{ steps.schroot-cache.outputs.cache-hit != 'true' }} run: | sudo schroot --chroot "source:${{ env.HOST_DISTRO }}-${{ env.BUILD_ARCH }}-${{ env.HOST_ARCH }}" --user root -- \ - apt-get install -y fakeroot advancecomp apt-utils file build-essential pkg-config debhelper lockfile-progs optipng pkg-config tzdata ucf + apt-get install -y fakeroot advancecomp apt-utils file build-essential pkg-config debhelper lockfile-progs optipng tzdata ucf - name: Populate sysroot (amd64 runtime for clang) if: ${{ steps.schroot-cache.outputs.cache-hit != 'true' && env.HOST_ARCH == 'i386' }} run: | @@ -267,12 +267,17 @@ jobs: if: ${{ steps.schroot-cache.outputs.cache-hit != 'true' }} run: | sudo schroot --chroot "source:${{ env.HOST_DISTRO }}-${{ env.BUILD_ARCH }}-${{ env.HOST_ARCH }}" --user root -- \ - apt-get install -y perl gcc g++ ninja-build debhelper + apt-get install -y perl gcc g++ ninja-build + - name: Populate depedencie (zlib) + if: ${{ steps.schroot-cache.outputs.cache-hit != 'true' }} + run: | + sudo schroot --chroot "source:${{ env.HOST_DISTRO }}-${{ env.BUILD_ARCH }}-${{ env.HOST_ARCH }}" --user root -- \ + apt-get install -y zlib1g-dev:${{ env.HOST_ARCH }} - name: Populate depedencie (gui, exclude some arches) if: ${{ steps.schroot-cache.outputs.cache-hit != 'true' && env.HOST_ARCH != 'armel' && env.HOST_ARCH != 'mipsel' && env.HOST_ARCH != 'mips64el' && env.HOST_ARCH != 'riscv64' }} run: | sudo schroot --chroot "source:${{ env.HOST_DISTRO }}-${{ env.BUILD_ARCH }}-${{ env.HOST_ARCH }}" --user root -- \ - apt-get install -y pkg-config libglib2.0-dev:${{ env.HOST_ARCH }} libgtk-3-dev:${{ env.HOST_ARCH }} + apt-get install -y libglib2.0-dev:${{ env.HOST_ARCH }} libgtk-3-dev:${{ env.HOST_ARCH }} - name: Populate depedencie (curl, for test purpose, exclude some arches) if: ${{ steps.schroot-cache.outputs.cache-hit != 'true' && env.HOST_ARCH != 'armel' && env.HOST_ARCH != 'mipsel' && env.HOST_ARCH != 'mips64el' && env.HOST_ARCH != 'riscv64' }} run: | diff --git a/yass/.github/workflows/releases-freebsd-binary.yml b/yass/.github/workflows/releases-freebsd-binary.yml index 92f521cdb7..2ec6a925a0 100644 --- a/yass/.github/workflows/releases-freebsd-binary.yml +++ b/yass/.github/workflows/releases-freebsd-binary.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: freebsd-binary-release: diff --git a/yass/.github/workflows/releases-ios.yml b/yass/.github/workflows/releases-ios.yml index 7920456475..3051bba13a 100644 --- a/yass/.github/workflows/releases-ios.yml +++ b/yass/.github/workflows/releases-ios.yml @@ -21,7 +21,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: ios-simulator-release: diff --git a/yass/.github/workflows/releases-linux-binary.yml b/yass/.github/workflows/releases-linux-binary.yml index 8ab1b92a2e..cc55a3c9db 100644 --- a/yass/.github/workflows/releases-linux-binary.yml +++ b/yass/.github/workflows/releases-linux-binary.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: linux-binary-release: diff --git a/yass/.github/workflows/releases-macos.yml b/yass/.github/workflows/releases-macos.yml index 7714b21c93..1b467c5aa2 100644 --- a/yass/.github/workflows/releases-macos.yml +++ b/yass/.github/workflows/releases-macos.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: mac-release: diff --git a/yass/.github/workflows/releases-mingw-new.yml b/yass/.github/workflows/releases-mingw-new.yml index b462ae8337..36799d24fa 100644 --- a/yass/.github/workflows/releases-mingw-new.yml +++ b/yass/.github/workflows/releases-mingw-new.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: mingw64-release: diff --git a/yass/.github/workflows/releases-mingw.yml b/yass/.github/workflows/releases-mingw.yml index b9d7aadaf5..5685f2f071 100644 --- a/yass/.github/workflows/releases-mingw.yml +++ b/yass/.github/workflows/releases-mingw.yml @@ -18,7 +18,7 @@ on: schedule: - cron: '0 16 * * *' env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: mingw64-release: diff --git a/yass/.github/workflows/releases-openwrt-binary.yml b/yass/.github/workflows/releases-openwrt-binary.yml index f36a979cd4..e5c40ca14d 100644 --- a/yass/.github/workflows/releases-openwrt-binary.yml +++ b/yass/.github/workflows/releases-openwrt-binary.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: openwrt-binary-release: diff --git a/yass/.github/workflows/releases-rpm.yml b/yass/.github/workflows/releases-rpm.yml index 5d8cd6619b..5bd8677d94 100644 --- a/yass/.github/workflows/releases-rpm.yml +++ b/yass/.github/workflows/releases-rpm.yml @@ -35,7 +35,7 @@ concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }} env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: docker_publish: diff --git a/yass/.github/workflows/releases-windows.yml b/yass/.github/workflows/releases-windows.yml index 3c25e9083a..50b790977b 100644 --- a/yass/.github/workflows/releases-windows.yml +++ b/yass/.github/workflows/releases-windows.yml @@ -24,7 +24,7 @@ defaults: run: shell: cmd env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: windows-release: diff --git a/yass/.github/workflows/sanitizers.yml b/yass/.github/workflows/sanitizers.yml index 8b1280750c..0367262e1c 100644 --- a/yass/.github/workflows/sanitizers.yml +++ b/yass/.github/workflows/sanitizers.yml @@ -18,7 +18,7 @@ on: schedule: - cron: '0 16 * * *' env: - CACHE_EPOCH: 124-1 + CACHE_EPOCH: 125-1 GOPROXY: direct jobs: sanitizer-linux: diff --git a/yass/CLANG_REVISION b/yass/CLANG_REVISION index 8db9309009..b13fa5faac 100644 --- a/yass/CLANG_REVISION +++ b/yass/CLANG_REVISION @@ -1 +1 @@ -llvmorg-19-init-2941-ga0b3dbaf-22 \ No newline at end of file +llvmorg-19-init-7229-g315c88c5-2 \ No newline at end of file diff --git a/yass/debian/control b/yass/debian/control index 3f2972049b..f43ddf4157 100644 --- a/yass/debian/control +++ b/yass/debian/control @@ -12,7 +12,8 @@ Build-Depends: ninja-build:native, debhelper (>= 9), ca-certificates, - pkg-config , + pkg-config, + zlib1g-dev, libglib2.0-dev , libgtk-3-dev Rules-Requires-Root: no diff --git a/yass/debian/rules b/yass/debian/rules index 9fc22cf576..e855226fdb 100755 --- a/yass/debian/rules +++ b/yass/debian/rules @@ -56,6 +56,7 @@ ifneq ($(filter cross,$(DEB_BUILD_PROFILES)),) override_dh_auto_configure: PKG_CONFIG = ${DEB_HOST_GNU_TYPE}-pkg-config endif +override_dh_auto_configure: CMAKE_OPTIONS += -DUSE_SYSTEM_ZLIB=on override_dh_auto_configure: CMAKE_OPTIONS += -DCMAKE_SYSTEM_NAME=Linux -DCMAKE_SYSTEM_PROCESSOR=$(DEB_HOST_ARCH) override_dh_auto_configure: CMAKE_OPTIONS += -DUSE_OLD_SYSTEMD_SERVICE=on diff --git a/yass/src/win32/yass.cpp b/yass/src/win32/yass.cpp index ff56f18f2e..52e0f3aaaf 100644 --- a/yass/src/win32/yass.cpp +++ b/yass/src/win32/yass.cpp @@ -53,6 +53,21 @@ int APIENTRY wWinMain(_In_ HINSTANCE hInstance, return -1; } + // This function is primarily useful to applications that were linked with /SUBSYSTEM:WINDOWS, + // which implies to the operating system that a console is not needed + // before entering the program's main method. + if (AttachConsole(ATTACH_PARENT_PROCESS) != 0) { + FILE* unusedFile; + // Swap to the new out/err streams + freopen_s(&unusedFile, "CONOUT$", "w", stdout); + freopen_s(&unusedFile, "CONOUT$", "w", stderr); + std::cout.clear(); + std::clog.clear(); + std::cerr.clear(); + fprintf(stderr, "attached to parent process\n"); + fflush(stderr); + } + if (!SetUTF8Locale()) { LOG(WARNING) << "Failed to set up utf-8 locale"; } diff --git a/yass/third_party/README.md b/yass/third_party/README.md new file mode 100644 index 0000000000..d417cc4c43 --- /dev/null +++ b/yass/third_party/README.md @@ -0,0 +1,6 @@ +# Update Zlib + +``` +rsync -aPvx ~/chromium/src/third_party/zlib . +rm -f zlib/BUILD.gn zlib/OWNERS +``` diff --git a/yass/third_party/abseil-cpp/CMake/AbseilDll.cmake b/yass/third_party/abseil-cpp/CMake/AbseilDll.cmake index 4beafd7af2..47f3beebd1 100644 --- a/yass/third_party/abseil-cpp/CMake/AbseilDll.cmake +++ b/yass/third_party/abseil-cpp/CMake/AbseilDll.cmake @@ -68,9 +68,7 @@ set(ABSL_INTERNAL_DLL_FILES "container/btree_set.h" "container/fixed_array.h" "container/flat_hash_map.h" - "container/flat_hash_map.cc" "container/flat_hash_set.h" - "container/flat_hash_set.cc" "container/inlined_vector.h" "container/internal/btree.h" "container/internal/btree_container.h" @@ -93,9 +91,7 @@ set(ABSL_INTERNAL_DLL_FILES "container/internal/raw_hash_set.h" "container/internal/tracked.h" "container/node_hash_map.h" - "container/node_hash_map.cc" "container/node_hash_set.h" - "container/node_hash_set.cc" "crc/crc32c.cc" "crc/crc32c.h" "crc/internal/cpu_detect.cc" diff --git a/yass/third_party/abseil-cpp/absl/abseil.podspec.gen.py b/yass/third_party/abseil-cpp/absl/abseil.podspec.gen.py index e567e8790a..cbf7cb423b 100755 --- a/yass/third_party/abseil-cpp/absl/abseil.podspec.gen.py +++ b/yass/third_party/abseil-cpp/absl/abseil.podspec.gen.py @@ -47,6 +47,11 @@ Pod::Spec.new do |s| s.osx.deployment_target = '10.11' s.tvos.deployment_target = '9.0' s.watchos.deployment_target = '2.0' + s.subspec 'xcprivacy' do |ss| + ss.resource_bundles = { + ss.module_name => 'PrivacyInfo.xcprivacy', + } + end """ # Rule object representing the rule of Bazel BUILD. @@ -191,6 +196,12 @@ def write_podspec_rule(f, rule, depth): name = get_spec_name(dep.replace(":", "/")) f.write("{indent}{var}.dependency '{dep}'\n".format( indent=indent, var=spec_var, dep=name)) + # Writes dependency to xcprivacy + f.write( + "{indent}{var}.dependency '{dep}'\n".format( + indent=indent, var=spec_var, dep="abseil/xcprivacy" + ) + ) def write_indented_list(f, leading, values): diff --git a/yass/third_party/abseil-cpp/absl/base/dynamic_annotations.h b/yass/third_party/abseil-cpp/absl/base/dynamic_annotations.h index 7ba8912ea6..f18b5e0a8f 100644 --- a/yass/third_party/abseil-cpp/absl/base/dynamic_annotations.h +++ b/yass/third_party/abseil-cpp/absl/base/dynamic_annotations.h @@ -252,25 +252,9 @@ ABSL_INTERNAL_END_EXTERN_C #else // !defined(ABSL_HAVE_MEMORY_SANITIZER) -// TODO(rogeeff): remove this branch -#ifdef ABSL_HAVE_THREAD_SANITIZER -#define ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ - do { \ - (void)(address); \ - (void)(size); \ - } while (0) -#define ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(address, size) \ - do { \ - (void)(address); \ - (void)(size); \ - } while (0) -#else - #define ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) // empty #define ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(address, size) // empty -#endif - #endif // ABSL_HAVE_MEMORY_SANITIZER // ------------------------------------------------------------------------- diff --git a/yass/third_party/abseil-cpp/absl/base/internal/spinlock.h b/yass/third_party/abseil-cpp/absl/base/internal/spinlock.h index 36f823ada8..301c3e3b43 100644 --- a/yass/third_party/abseil-cpp/absl/base/internal/spinlock.h +++ b/yass/third_party/abseil-cpp/absl/base/internal/spinlock.h @@ -202,6 +202,15 @@ class ABSL_LOCKABLE ABSL_ATTRIBUTE_WARN_UNUSED SpinLock { // Corresponding locker object that arranges to acquire a spinlock for // the duration of a C++ scope. +// +// TODO(b/176172494): Use only [[nodiscard]] when baseline is raised. +// TODO(b/6695610): Remove forward declaration when #ifdef is no longer needed. +#if ABSL_HAVE_CPP_ATTRIBUTE(nodiscard) +class [[nodiscard]] SpinLockHolder; +#else +class ABSL_MUST_USE_RESULT ABSL_ATTRIBUTE_TRIVIAL_ABI SpinLockHolder; +#endif + class ABSL_SCOPED_LOCKABLE SpinLockHolder { public: inline explicit SpinLockHolder(SpinLock* l) ABSL_EXCLUSIVE_LOCK_FUNCTION(l) diff --git a/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock.cc b/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock.cc index 05e0e7ba5e..a0bf3a65f7 100644 --- a/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock.cc +++ b/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock.cc @@ -121,18 +121,6 @@ double UnscaledCycleClock::Frequency() { return aarch64_timer_frequency; } -#elif defined(__riscv) - -int64_t UnscaledCycleClock::Now() { - int64_t virtual_timer_value; - asm volatile("rdcycle %0" : "=r"(virtual_timer_value)); - return virtual_timer_value; -} - -double UnscaledCycleClock::Frequency() { - return base_internal::NominalCPUFrequency(); -} - #elif defined(_M_IX86) || defined(_M_X64) #pragma intrinsic(__rdtsc) diff --git a/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock_config.h b/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock_config.h index 24b324ac99..43a3dabeea 100644 --- a/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock_config.h +++ b/yass/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock_config.h @@ -21,8 +21,8 @@ // The following platforms have an implementation of a hardware counter. #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ - defined(__powerpc__) || defined(__ppc__) || defined(__riscv) || \ - defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) + defined(__powerpc__) || defined(__ppc__) || defined(_M_IX86) || \ + (defined(_M_X64) && !defined(_M_ARM64EC)) #define ABSL_HAVE_UNSCALED_CYCLECLOCK_IMPLEMENTATION 1 #else #define ABSL_HAVE_UNSCALED_CYCLECLOCK_IMPLEMENTATION 0 @@ -53,8 +53,8 @@ #if ABSL_USE_UNSCALED_CYCLECLOCK // This macro can be used to test if UnscaledCycleClock::Frequency() // is NominalCPUFrequency() on a particular platform. -#if (defined(__i386__) || defined(__x86_64__) || defined(__riscv) || \ - defined(_M_IX86) || defined(_M_X64)) +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ + defined(_M_X64)) #define ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY #endif #endif diff --git a/yass/third_party/abseil-cpp/absl/base/macros.h b/yass/third_party/abseil-cpp/absl/base/macros.h index 33c1cd1e8d..b318f11664 100644 --- a/yass/third_party/abseil-cpp/absl/base/macros.h +++ b/yass/third_party/abseil-cpp/absl/base/macros.h @@ -174,4 +174,16 @@ ABSL_NAMESPACE_END #define ABSL_DEPRECATE_AND_INLINE() #endif +// Requires the compiler to prove that the size of the given object is at least +// the expected amount. +#if ABSL_HAVE_ATTRIBUTE(diagnose_if) && ABSL_HAVE_BUILTIN(__builtin_object_size) +#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N) \ + __attribute__((diagnose_if(__builtin_object_size(Obj, 0) < N, \ + "object size provably too small " \ + "(this would corrupt memory)", \ + "error"))) +#else +#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N) +#endif + #endif // ABSL_BASE_MACROS_H_ diff --git a/yass/third_party/abseil-cpp/absl/container/BUILD.bazel b/yass/third_party/abseil-cpp/absl/container/BUILD.bazel index 366bf3cd2e..12e27c21fd 100644 --- a/yass/third_party/abseil-cpp/absl/container/BUILD.bazel +++ b/yass/third_party/abseil-cpp/absl/container/BUILD.bazel @@ -242,7 +242,6 @@ NOTEST_TAGS_MOBILE = [ cc_library( name = "flat_hash_map", - srcs = ["flat_hash_map.cc"], hdrs = ["flat_hash_map.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, @@ -250,9 +249,7 @@ cc_library( ":container_memory", ":hash_function_defaults", ":raw_hash_map", - ":raw_hash_set", "//absl/algorithm:container", - "//absl/base:config", "//absl/base:core_headers", "//absl/memory", ], @@ -282,7 +279,6 @@ cc_test( cc_library( name = "flat_hash_set", - srcs = ["flat_hash_set.cc"], hdrs = ["flat_hash_set.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, @@ -291,7 +287,6 @@ cc_library( ":hash_function_defaults", ":raw_hash_set", "//absl/algorithm:container", - "//absl/base:config", "//absl/base:core_headers", "//absl/memory", ], @@ -323,7 +318,6 @@ cc_test( cc_library( name = "node_hash_map", - srcs = ["node_hash_map.cc"], hdrs = ["node_hash_map.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, @@ -332,9 +326,7 @@ cc_library( ":hash_function_defaults", ":node_slot_policy", ":raw_hash_map", - ":raw_hash_set", "//absl/algorithm:container", - "//absl/base:config", "//absl/base:core_headers", "//absl/memory", ], @@ -361,7 +353,6 @@ cc_test( cc_library( name = "node_hash_set", - srcs = ["node_hash_set.cc"], hdrs = ["node_hash_set.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, @@ -371,7 +362,6 @@ cc_library( ":node_slot_policy", ":raw_hash_set", "//absl/algorithm:container", - "//absl/base:config", "//absl/base:core_headers", "//absl/memory", ], @@ -711,8 +701,10 @@ cc_test( "//absl/base:config", "//absl/base:core_headers", "//absl/base:prefetch", + "//absl/functional:function_ref", "//absl/hash", "//absl/log", + "//absl/log:check", "//absl/memory", "//absl/meta:type_traits", "//absl/strings", @@ -734,6 +726,7 @@ cc_binary( ":hash_function_defaults", ":raw_hash_set", "//absl/base:raw_logging_internal", + "//absl/random", "//absl/strings:str_format", "@com_github_google_benchmark//:benchmark_main", ], @@ -1012,6 +1005,7 @@ cc_library( ":compressed_tuple", ":container_memory", ":layout", + "//absl/base:config", "//absl/base:core_headers", "//absl/base:raw_logging_internal", "//absl/base:throw_delegate", @@ -1020,7 +1014,6 @@ cc_library( "//absl/strings", "//absl/strings:cord", "//absl/types:compare", - "//absl/utility", ], ) diff --git a/yass/third_party/abseil-cpp/absl/container/CMakeLists.txt b/yass/third_party/abseil-cpp/absl/container/CMakeLists.txt index 8e64adb1a7..04055816cd 100644 --- a/yass/third_party/abseil-cpp/absl/container/CMakeLists.txt +++ b/yass/third_party/abseil-cpp/absl/container/CMakeLists.txt @@ -27,10 +27,11 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS - absl::container_common absl::common_policy_traits absl::compare absl::compressed_tuple + absl::config + absl::container_common absl::container_memory absl::cord absl::core_headers @@ -40,7 +41,6 @@ absl_cc_library( absl::strings absl::throw_delegate absl::type_traits - absl::utility ) # Internal-only target, do not depend on directly. @@ -283,17 +283,13 @@ absl_cc_library( flat_hash_map HDRS "flat_hash_map.h" - SRCS - "flat_hash_map.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::config absl::container_memory absl::core_headers absl::hash_function_defaults absl::raw_hash_map - absl::raw_hash_set absl::algorithm_container absl::memory PUBLIC @@ -325,12 +321,9 @@ absl_cc_library( flat_hash_set HDRS "flat_hash_set.h" - SRCS - "flat_hash_set.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::config absl::container_memory absl::hash_function_defaults absl::raw_hash_set @@ -369,18 +362,14 @@ absl_cc_library( node_hash_map HDRS "node_hash_map.h" - SRCS - "node_hash_map.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::config absl::container_memory absl::core_headers absl::hash_function_defaults absl::node_slot_policy absl::raw_hash_map - absl::raw_hash_set absl::algorithm_container absl::memory PUBLIC @@ -409,12 +398,9 @@ absl_cc_library( node_hash_set HDRS "node_hash_set.h" - SRCS - "node_hash_set.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::config absl::container_memory absl::core_headers absl::hash_function_defaults @@ -767,11 +753,13 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::base + absl::check absl::config absl::container_memory absl::core_headers absl::flat_hash_map absl::flat_hash_set + absl::function_ref absl::hash absl::hash_function_defaults absl::hash_policy_testing diff --git a/yass/third_party/abseil-cpp/absl/container/flat_hash_map.cc b/yass/third_party/abseil-cpp/absl/container/flat_hash_map.cc deleted file mode 100644 index d5a2531703..0000000000 --- a/yass/third_party/abseil-cpp/absl/container/flat_hash_map.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2024 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/container/flat_hash_map.h" - -#include -#include - -#include "absl/base/config.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN - -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, int32_t, int32_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, std::string, int32_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, int32_t, std::string); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, int64_t, int64_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, std::string, int64_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, int64_t, std::string); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, uint32_t, uint32_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, std::string, uint32_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, uint32_t, std::string); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, uint64_t, uint64_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, std::string, uint64_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, uint64_t, std::string); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(template, std::string, std::string); - -ABSL_NAMESPACE_END -} // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/flat_hash_map.h b/yass/third_party/abseil-cpp/absl/container/flat_hash_map.h index a60b582987..a33c794fad 100644 --- a/yass/third_party/abseil-cpp/absl/container/flat_hash_map.h +++ b/yass/third_party/abseil-cpp/absl/container/flat_hash_map.h @@ -31,20 +31,15 @@ #define ABSL_CONTAINER_FLAT_HASH_MAP_H_ #include -#include -#include #include -#include #include #include #include "absl/algorithm/container.h" -#include "absl/base/config.h" #include "absl/base/macros.h" #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" // IWYU pragma: export #include "absl/container/internal/raw_hash_map.h" // IWYU pragma: export -#include "absl/container/internal/raw_hash_set.h" // IWYU pragma: export #include "absl/memory/memory.h" namespace absl { @@ -637,42 +632,6 @@ struct IsUnorderedContainer< } // namespace container_algorithm_internal -// Explicit template instantiations for common map types in order to decrease -// linker input size. Note that explicitly instantiating flat_hash_map itself -// doesn't help because it has no non-alias members. If we need to decrease -// linker input size more, we could potentially (a) add more key/value types, -// e.g. string_view/Cord, (b) instantiate some template member functions, e.g. -// operator[]/find. The EXTERN argument is `extern` for the declaration and -// empty for the definition. -#define ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(TEMPLATE, KEY, VALUE) \ - TEMPLATE class absl::container_internal::raw_hash_map< \ - absl::container_internal::FlatHashMapPolicy, \ - absl::container_internal::hash_default_hash, \ - absl::container_internal::hash_default_eq, \ - std::allocator>>; \ - TEMPLATE class absl::container_internal::raw_hash_set< \ - absl::container_internal::FlatHashMapPolicy, \ - absl::container_internal::hash_default_hash, \ - absl::container_internal::hash_default_eq, \ - std::allocator>>; - -// We use exact-width integer types rather than `int`/`long`/`long long` because -// these are the types recommended in the Google C++ style guide and which are -// commonly used in Google code. -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, int32_t, int32_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, std::string, int32_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, int32_t, std::string) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, int64_t, int64_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, std::string, int64_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, int64_t, std::string) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, uint32_t, uint32_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, std::string, uint32_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, uint32_t, std::string) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, uint64_t, uint64_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, std::string, uint64_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, uint64_t, std::string) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_MAP(extern template, std::string, std::string) - ABSL_NAMESPACE_END } // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/flat_hash_set.cc b/yass/third_party/abseil-cpp/absl/container/flat_hash_set.cc deleted file mode 100644 index d07081b8f1..0000000000 --- a/yass/third_party/abseil-cpp/absl/container/flat_hash_set.cc +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2024 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/container/flat_hash_set.h" - -#include -#include - -#include "absl/base/config.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN - -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, int8_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, int16_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, int32_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, int64_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, uint8_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, uint16_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, uint32_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, uint64_t); -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(template, std::string); - -ABSL_NAMESPACE_END -} // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/flat_hash_set.h b/yass/third_party/abseil-cpp/absl/container/flat_hash_set.h index 461526c0a8..5f72f9549e 100644 --- a/yass/third_party/abseil-cpp/absl/container/flat_hash_set.h +++ b/yass/third_party/abseil-cpp/absl/container/flat_hash_set.h @@ -30,14 +30,11 @@ #define ABSL_CONTAINER_FLAT_HASH_SET_H_ #include -#include #include -#include #include #include #include "absl/algorithm/container.h" -#include "absl/base/config.h" #include "absl/base/macros.h" #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" // IWYU pragma: export @@ -526,32 +523,6 @@ struct IsUnorderedContainer> } // namespace container_algorithm_internal -// Explicit template instantiations for common set types in order to decrease -// linker input size. Note that explicitly instantiating flat_hash_set itself -// doesn't help because it has no non-alias members. If we need to decrease -// linker input size more, we could potentially (a) add more key types, e.g. -// string_view/Cord, (b) instantiate some template member functions, e.g. -// find/insert/emplace. The EXTERN argument is `extern` for the declaration and -// empty for the definition. -#define ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(TEMPLATE, KEY) \ - TEMPLATE class absl::container_internal::raw_hash_set< \ - absl::container_internal::FlatHashSetPolicy, \ - absl::container_internal::hash_default_hash, \ - absl::container_internal::hash_default_eq, std::allocator>; - -// We use exact-width integer types rather than `int`/`long`/`long long` because -// these are the types recommended in the Google C++ style guide and which are -// commonly used in Google code. -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, int8_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, int16_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, int32_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, int64_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, uint8_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, uint16_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, uint32_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, uint64_t) -ABSL_INTERNAL_TEMPLATE_FLAT_HASH_SET(extern template, std::string) - ABSL_NAMESPACE_END } // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/internal/btree.h b/yass/third_party/abseil-cpp/absl/container/internal/btree.h index fd7860dacc..689e71a5ce 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/btree.h +++ b/yass/third_party/abseil-cpp/absl/container/internal/btree.h @@ -53,11 +53,11 @@ #include #include #include -#include #include #include #include +#include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/internal/common.h" @@ -70,7 +70,6 @@ #include "absl/strings/cord.h" #include "absl/strings/string_view.h" #include "absl/types/compare.h" -#include "absl/utility/utility.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -78,9 +77,10 @@ namespace container_internal { #ifdef ABSL_BTREE_ENABLE_GENERATIONS #error ABSL_BTREE_ENABLE_GENERATIONS cannot be directly set -#elif defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ - defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ - defined(ABSL_HAVE_MEMORY_SANITIZER) +#elif (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER)) && \ + !defined(NDEBUG_SANITIZER) // If defined, performance is important. // When compiled in sanitizer mode, we add generation integers to the nodes and // iterators. When iterators are used, we validate that the container has not // been mutated since the iterator was constructed. @@ -475,7 +475,7 @@ struct SearchResult { // useful information. template struct SearchResult { - SearchResult() {} + SearchResult() = default; explicit SearchResult(V v) : value(v) {} SearchResult(V v, MatchKind /*match*/) : value(v) {} @@ -580,14 +580,12 @@ class btree_node { using layout_type = absl::container_internal::Layout; + using leaf_layout_type = typename layout_type::template WithStaticSizes< + /*parent*/ 1, + /*generation*/ BtreeGenerationsEnabled() ? 1 : 0, + /*position, start, finish, max_count*/ 4>; constexpr static size_type SizeWithNSlots(size_type n) { - return layout_type( - /*parent*/ 1, - /*generation*/ BtreeGenerationsEnabled() ? 1 : 0, - /*position, start, finish, max_count*/ 4, - /*slots*/ n, - /*children*/ 0) - .AllocSize(); + return leaf_layout_type(/*slots*/ n, /*children*/ 0).AllocSize(); } // A lower bound for the overhead of fields other than slots in a leaf node. constexpr static size_type MinimumOverhead() { @@ -619,27 +617,22 @@ class btree_node { constexpr static size_type kNodeSlots = kNodeTargetSlots >= kMinNodeSlots ? kNodeTargetSlots : kMinNodeSlots; + using internal_layout_type = typename layout_type::template WithStaticSizes< + /*parent*/ 1, + /*generation*/ BtreeGenerationsEnabled() ? 1 : 0, + /*position, start, finish, max_count*/ 4, /*slots*/ kNodeSlots, + /*children*/ kNodeSlots + 1>; + // The node is internal (i.e. is not a leaf node) if and only if `max_count` // has this value. constexpr static field_type kInternalNodeMaxCount = 0; - constexpr static layout_type Layout(const size_type slot_count, - const size_type child_count) { - return layout_type( - /*parent*/ 1, - /*generation*/ BtreeGenerationsEnabled() ? 1 : 0, - /*position, start, finish, max_count*/ 4, - /*slots*/ slot_count, - /*children*/ child_count); - } // Leaves can have less than kNodeSlots values. - constexpr static layout_type LeafLayout( + constexpr static leaf_layout_type LeafLayout( const size_type slot_count = kNodeSlots) { - return Layout(slot_count, 0); - } - constexpr static layout_type InternalLayout() { - return Layout(kNodeSlots, kNodeSlots + 1); + return leaf_layout_type(slot_count, 0); } + constexpr static auto InternalLayout() { return internal_layout_type(); } constexpr static size_type LeafSize(const size_type slot_count = kNodeSlots) { return LeafLayout(slot_count).AllocSize(); } diff --git a/yass/third_party/abseil-cpp/absl/container/internal/hash_policy_traits.h b/yass/third_party/abseil-cpp/absl/container/internal/hash_policy_traits.h index ec08794a40..ad835d6fcd 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/hash_policy_traits.h +++ b/yass/third_party/abseil-cpp/absl/container/internal/hash_policy_traits.h @@ -168,7 +168,7 @@ struct hash_policy_traits : common_policy_traits { #endif } - // Whether small object optimization is enabled. False by default. + // Whether small object optimization is enabled. True by default. static constexpr bool soo_enabled() { return soo_enabled_impl(Rank1{}); } private: @@ -197,7 +197,7 @@ struct hash_policy_traits : common_policy_traits { return P::soo_enabled(); } - static constexpr bool soo_enabled_impl(Rank0) { return false; } + static constexpr bool soo_enabled_impl(Rank0) { return true; } }; } // namespace container_internal diff --git a/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.cc b/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.cc index b21beef0df..fd21d966b7 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.cc @@ -18,13 +18,18 @@ #include #include #include +#include +#include #include #include #include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/internal/per_thread_tls.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/base/no_destructor.h" +#include "absl/base/optimization.h" #include "absl/debugging/stacktrace.h" #include "absl/memory/memory.h" #include "absl/profiling/internal/exponential_biased.h" @@ -73,7 +78,10 @@ HashtablezInfo::HashtablezInfo() = default; HashtablezInfo::~HashtablezInfo() = default; void HashtablezInfo::PrepareForSampling(int64_t stride, - size_t inline_element_size_value) { + size_t inline_element_size_value, + size_t key_size_value, + size_t value_size_value, + uint16_t soo_capacity_value) { capacity.store(0, std::memory_order_relaxed); size.store(0, std::memory_order_relaxed); num_erases.store(0, std::memory_order_relaxed); @@ -93,6 +101,9 @@ void HashtablezInfo::PrepareForSampling(int64_t stride, depth = absl::GetStackTrace(stack, HashtablezInfo::kMaxStackDepth, /* skip_count= */ 0); inline_element_size = inline_element_size_value; + key_size = key_size_value; + value_size = value_size_value; + soo_capacity = soo_capacity_value; } static bool ShouldForceSampling() { @@ -116,12 +127,13 @@ static bool ShouldForceSampling() { } HashtablezInfo* SampleSlow(SamplingState& next_sample, - size_t inline_element_size) { + size_t inline_element_size, size_t key_size, + size_t value_size, uint16_t soo_capacity) { if (ABSL_PREDICT_FALSE(ShouldForceSampling())) { next_sample.next_sample = 1; const int64_t old_stride = exchange(next_sample.sample_stride, 1); - HashtablezInfo* result = - GlobalHashtablezSampler().Register(old_stride, inline_element_size); + HashtablezInfo* result = GlobalHashtablezSampler().Register( + old_stride, inline_element_size, key_size, value_size, soo_capacity); return result; } @@ -151,10 +163,12 @@ HashtablezInfo* SampleSlow(SamplingState& next_sample, // that case. if (first) { if (ABSL_PREDICT_TRUE(--next_sample.next_sample > 0)) return nullptr; - return SampleSlow(next_sample, inline_element_size); + return SampleSlow(next_sample, inline_element_size, key_size, value_size, + soo_capacity); } - return GlobalHashtablezSampler().Register(old_stride, inline_element_size); + return GlobalHashtablezSampler().Register(old_stride, inline_element_size, + key_size, value_size, soo_capacity); #endif } diff --git a/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.h b/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.h index e41ee2d747..d74acf8c6e 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.h +++ b/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler.h @@ -40,15 +40,20 @@ #define ABSL_CONTAINER_INTERNAL_HASHTABLEZ_SAMPLER_H_ #include +#include +#include #include #include #include +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/per_thread_tls.h" #include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" #include "absl/profiling/internal/sample_recorder.h" #include "absl/synchronization/mutex.h" +#include "absl/time/time.h" #include "absl/utility/utility.h" namespace absl { @@ -67,7 +72,9 @@ struct HashtablezInfo : public profiling_internal::Sample { // Puts the object into a clean state, fills in the logically `const` members, // blocking for any readers that are currently sampling the object. - void PrepareForSampling(int64_t stride, size_t inline_element_size_value) + void PrepareForSampling(int64_t stride, size_t inline_element_size_value, + size_t key_size, size_t value_size, + uint16_t soo_capacity_value) ABSL_EXCLUSIVE_LOCKS_REQUIRED(init_mu); // These fields are mutated by the various Record* APIs and need to be @@ -91,8 +98,15 @@ struct HashtablezInfo : public profiling_internal::Sample { static constexpr int kMaxStackDepth = 64; absl::Time create_time; int32_t depth; + // The SOO capacity for this table in elements (not bytes). Note that sampled + // tables are never SOO because we need to store the infoz handle on the heap. + // Tables that would be SOO if not sampled should have: soo_capacity > 0 && + // size <= soo_capacity && max_reserve <= soo_capacity. + uint16_t soo_capacity; void* stack[kMaxStackDepth]; - size_t inline_element_size; // How big is the slot? + size_t inline_element_size; // How big is the slot in bytes? + size_t key_size; // sizeof(key_type) + size_t value_size; // sizeof(value_type) }; void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length); @@ -117,7 +131,8 @@ struct SamplingState { }; HashtablezInfo* SampleSlow(SamplingState& next_sample, - size_t inline_element_size); + size_t inline_element_size, size_t key_size, + size_t value_size, uint16_t soo_capacity); void UnsampleSlow(HashtablezInfo* info); #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) @@ -204,16 +219,19 @@ class HashtablezInfoHandle { extern ABSL_PER_THREAD_TLS_KEYWORD SamplingState global_next_sample; #endif // defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) -// Returns an RAII sampling handle that manages registration and unregistation -// with the global sampler. +// Returns a sampling handle. inline HashtablezInfoHandle Sample( - size_t inline_element_size ABSL_ATTRIBUTE_UNUSED) { + ABSL_ATTRIBUTE_UNUSED size_t inline_element_size, + ABSL_ATTRIBUTE_UNUSED size_t key_size, + ABSL_ATTRIBUTE_UNUSED size_t value_size, + ABSL_ATTRIBUTE_UNUSED uint16_t soo_capacity) { #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) if (ABSL_PREDICT_TRUE(--global_next_sample.next_sample > 0)) { return HashtablezInfoHandle(nullptr); } - return HashtablezInfoHandle( - SampleSlow(global_next_sample, inline_element_size)); + return HashtablezInfoHandle(SampleSlow(global_next_sample, + inline_element_size, key_size, + value_size, soo_capacity)); #else return HashtablezInfoHandle(nullptr); #endif // !ABSL_PER_THREAD_TLS diff --git a/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler_test.cc b/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler_test.cc index 8ebb08da4a..24d3bc4818 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler_test.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/hashtablez_sampler_test.cc @@ -15,8 +15,12 @@ #include "absl/container/internal/hashtablez_sampler.h" #include +#include +#include +#include #include #include +#include #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -67,7 +71,11 @@ std::vector GetSizes(HashtablezSampler* s) { HashtablezInfo* Register(HashtablezSampler* s, size_t size) { const int64_t test_stride = 123; const size_t test_element_size = 17; - auto* info = s->Register(test_stride, test_element_size); + const size_t test_key_size = 3; + const size_t test_value_size = 5; + auto* info = + s->Register(test_stride, test_element_size, /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity=*/0); assert(info != nullptr); info->size.store(size); return info; @@ -77,9 +85,15 @@ TEST(HashtablezInfoTest, PrepareForSampling) { absl::Time test_start = absl::Now(); const int64_t test_stride = 123; const size_t test_element_size = 17; + const size_t test_key_size = 15; + const size_t test_value_size = 13; + HashtablezInfo info; absl::MutexLock l(&info.init_mu); - info.PrepareForSampling(test_stride, test_element_size); + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/1); EXPECT_EQ(info.capacity.load(), 0); EXPECT_EQ(info.size.load(), 0); @@ -94,6 +108,9 @@ TEST(HashtablezInfoTest, PrepareForSampling) { EXPECT_GE(info.create_time, test_start); EXPECT_EQ(info.weight, test_stride); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); + EXPECT_EQ(info.soo_capacity, 1); info.capacity.store(1, std::memory_order_relaxed); info.size.store(1, std::memory_order_relaxed); @@ -106,7 +123,10 @@ TEST(HashtablezInfoTest, PrepareForSampling) { info.max_reserve.store(1, std::memory_order_relaxed); info.create_time = test_start - absl::Hours(20); - info.PrepareForSampling(test_stride * 2, test_element_size); + info.PrepareForSampling(test_stride * 2, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/0); EXPECT_EQ(info.capacity.load(), 0); EXPECT_EQ(info.size.load(), 0); EXPECT_EQ(info.num_erases.load(), 0); @@ -119,7 +139,10 @@ TEST(HashtablezInfoTest, PrepareForSampling) { EXPECT_EQ(info.max_reserve.load(), 0); EXPECT_EQ(info.weight, 2 * test_stride); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); EXPECT_GE(info.create_time, test_start); + EXPECT_EQ(info.soo_capacity, 0); } TEST(HashtablezInfoTest, RecordStorageChanged) { @@ -127,7 +150,13 @@ TEST(HashtablezInfoTest, RecordStorageChanged) { absl::MutexLock l(&info.init_mu); const int64_t test_stride = 21; const size_t test_element_size = 19; - info.PrepareForSampling(test_stride, test_element_size); + const size_t test_key_size = 17; + const size_t test_value_size = 15; + + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/0); RecordStorageChangedSlow(&info, 17, 47); EXPECT_EQ(info.size.load(), 17); EXPECT_EQ(info.capacity.load(), 47); @@ -141,7 +170,13 @@ TEST(HashtablezInfoTest, RecordInsert) { absl::MutexLock l(&info.init_mu); const int64_t test_stride = 25; const size_t test_element_size = 23; - info.PrepareForSampling(test_stride, test_element_size); + const size_t test_key_size = 21; + const size_t test_value_size = 19; + + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/0); EXPECT_EQ(info.max_probe_length.load(), 0); RecordInsertSlow(&info, 0x0000FF00, 6 * kProbeLength); EXPECT_EQ(info.max_probe_length.load(), 6); @@ -163,9 +198,15 @@ TEST(HashtablezInfoTest, RecordInsert) { TEST(HashtablezInfoTest, RecordErase) { const int64_t test_stride = 31; const size_t test_element_size = 29; + const size_t test_key_size = 27; + const size_t test_value_size = 25; + HashtablezInfo info; absl::MutexLock l(&info.init_mu); - info.PrepareForSampling(test_stride, test_element_size); + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/1); EXPECT_EQ(info.num_erases.load(), 0); EXPECT_EQ(info.size.load(), 0); RecordInsertSlow(&info, 0x0000FF00, 6 * kProbeLength); @@ -174,14 +215,23 @@ TEST(HashtablezInfoTest, RecordErase) { EXPECT_EQ(info.size.load(), 0); EXPECT_EQ(info.num_erases.load(), 1); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); + EXPECT_EQ(info.soo_capacity, 1); } TEST(HashtablezInfoTest, RecordRehash) { const int64_t test_stride = 33; const size_t test_element_size = 31; + const size_t test_key_size = 29; + const size_t test_value_size = 27; HashtablezInfo info; absl::MutexLock l(&info.init_mu); - info.PrepareForSampling(test_stride, test_element_size); + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + + /*soo_capacity_value=*/0); RecordInsertSlow(&info, 0x1, 0); RecordInsertSlow(&info, 0x2, kProbeLength); RecordInsertSlow(&info, 0x4, kProbeLength); @@ -201,6 +251,9 @@ TEST(HashtablezInfoTest, RecordRehash) { EXPECT_EQ(info.num_erases.load(), 0); EXPECT_EQ(info.num_rehashes.load(), 1); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); + EXPECT_EQ(info.soo_capacity, 0); } TEST(HashtablezInfoTest, RecordReservation) { @@ -208,7 +261,14 @@ TEST(HashtablezInfoTest, RecordReservation) { absl::MutexLock l(&info.init_mu); const int64_t test_stride = 35; const size_t test_element_size = 33; - info.PrepareForSampling(test_stride, test_element_size); + const size_t test_key_size = 31; + const size_t test_value_size = 29; + + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + + /*soo_capacity_value=*/0); RecordReservationSlow(&info, 3); EXPECT_EQ(info.max_reserve.load(), 3); @@ -224,12 +284,19 @@ TEST(HashtablezInfoTest, RecordReservation) { #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) TEST(HashtablezSamplerTest, SmallSampleParameter) { const size_t test_element_size = 31; + const size_t test_key_size = 33; + const size_t test_value_size = 35; + SetHashtablezEnabled(true); SetHashtablezSampleParameter(100); for (int i = 0; i < 1000; ++i) { SamplingState next_sample = {0, 0}; - HashtablezInfo* sample = SampleSlow(next_sample, test_element_size); + HashtablezInfo* sample = + SampleSlow(next_sample, test_element_size, + /*key_size=*/test_key_size, /*value_size=*/test_value_size, + + /*soo_capacity=*/0); EXPECT_GT(next_sample.next_sample, 0); EXPECT_EQ(next_sample.next_sample, next_sample.sample_stride); EXPECT_NE(sample, nullptr); @@ -239,12 +306,17 @@ TEST(HashtablezSamplerTest, SmallSampleParameter) { TEST(HashtablezSamplerTest, LargeSampleParameter) { const size_t test_element_size = 31; + const size_t test_key_size = 33; + const size_t test_value_size = 35; SetHashtablezEnabled(true); SetHashtablezSampleParameter(std::numeric_limits::max()); for (int i = 0; i < 1000; ++i) { SamplingState next_sample = {0, 0}; - HashtablezInfo* sample = SampleSlow(next_sample, test_element_size); + HashtablezInfo* sample = + SampleSlow(next_sample, test_element_size, + /*key_size=*/test_key_size, /*value_size=*/test_value_size, + /*soo_capacity=*/0); EXPECT_GT(next_sample.next_sample, 0); EXPECT_EQ(next_sample.next_sample, next_sample.sample_stride); EXPECT_NE(sample, nullptr); @@ -254,13 +326,20 @@ TEST(HashtablezSamplerTest, LargeSampleParameter) { TEST(HashtablezSamplerTest, Sample) { const size_t test_element_size = 31; + const size_t test_key_size = 33; + const size_t test_value_size = 35; SetHashtablezEnabled(true); SetHashtablezSampleParameter(100); int64_t num_sampled = 0; int64_t total = 0; double sample_rate = 0.0; for (int i = 0; i < 1000000; ++i) { - HashtablezInfoHandle h = Sample(test_element_size); + HashtablezInfoHandle h = + Sample(test_element_size, + /*key_size=*/test_key_size, /*value_size=*/test_value_size, + + /*soo_capacity=*/0); + ++total; if (h.IsSampled()) { ++num_sampled; @@ -275,7 +354,12 @@ TEST(HashtablezSamplerTest, Handle) { auto& sampler = GlobalHashtablezSampler(); const int64_t test_stride = 41; const size_t test_element_size = 39; - HashtablezInfoHandle h(sampler.Register(test_stride, test_element_size)); + const size_t test_key_size = 37; + const size_t test_value_size = 35; + HashtablezInfoHandle h(sampler.Register(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity=*/0)); auto* info = HashtablezInfoHandlePeer::GetInfo(&h); info->hashes_bitwise_and.store(0x12345678, std::memory_order_relaxed); @@ -351,18 +435,28 @@ TEST(HashtablezSamplerTest, MultiThreaded) { for (int i = 0; i < 10; ++i) { const int64_t sampling_stride = 11 + i % 3; const size_t elt_size = 10 + i % 2; - pool.Schedule([&sampler, &stop, sampling_stride, elt_size]() { + const size_t key_size = 12 + i % 4; + const size_t value_size = 13 + i % 5; + pool.Schedule([&sampler, &stop, sampling_stride, elt_size, key_size, + value_size]() { std::random_device rd; std::mt19937 gen(rd()); std::vector infoz; while (!stop.HasBeenNotified()) { if (infoz.empty()) { - infoz.push_back(sampler.Register(sampling_stride, elt_size)); + infoz.push_back(sampler.Register(sampling_stride, elt_size, + /*key_size=*/key_size, + /*value_size=*/value_size, + /*soo_capacity=*/0)); } switch (std::uniform_int_distribution<>(0, 2)(gen)) { case 0: { - infoz.push_back(sampler.Register(sampling_stride, elt_size)); + infoz.push_back(sampler.Register(sampling_stride, elt_size, + /*key_size=*/key_size, + /*value_size=*/value_size, + + /*soo_capacity=*/0)); break; } case 1: { diff --git a/yass/third_party/abseil-cpp/absl/container/internal/layout.h b/yass/third_party/abseil-cpp/absl/container/internal/layout.h index 1bf739cc75..384929af49 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/layout.h +++ b/yass/third_party/abseil-cpp/absl/container/internal/layout.h @@ -81,9 +81,30 @@ // } // // The layout we used above combines fixed-size with dynamically-sized fields. -// This is quite common. Layout is optimized for this use case and generates -// optimal code. All computations that can be performed at compile time are -// indeed performed at compile time. +// This is quite common. Layout is optimized for this use case and attempts to +// generate optimal code. To help the compiler do that in more cases, you can +// specify the fixed sizes using `WithStaticSizes`. This ensures that all +// computations that can be performed at compile time are indeed performed at +// compile time. Note that sometimes the `template` keyword is needed. E.g.: +// +// using SL = L::template WithStaticSizes<1, 1>; +// +// void Use(unsigned char* p) { +// // First, extract N and M. +// // Using `prefix` we can access the first three arrays but not more. +// // +// // More details: The first element always has offset 0. `SL` +// // has offsets for the second and third array based on sizes of +// // the first and second array, specified via `WithStaticSizes`. +// constexpr auto prefix = SL::Partial(); +// size_t n = *prefix.Pointer<0>(p); +// size_t m = *prefix.Pointer<1>(p); +// +// // Now we can get a pointer to the final payload. +// const SL layout(n, m); +// double* a = layout.Pointer(p); +// int* b = layout.Pointer(p); +// } // // Efficiency tip: The order of fields matters. In `Layout` try to // ensure that `alignof(T1) >= ... >= alignof(TN)`. This way you'll have no @@ -107,7 +128,7 @@ // CompactString(const char* s = "") { // const size_t size = strlen(s); // // size_t[1] followed by char[size + 1]. -// const L layout(1, size + 1); +// const L layout(size + 1); // p_.reset(new unsigned char[layout.AllocSize()]); // // If running under ASAN, mark the padding bytes, if any, to catch // // memory errors. @@ -125,14 +146,13 @@ // // const char* c_str() const { // // Equivalent to reinterpret_cast(p.get() + sizeof(size_t)). -// // The argument in Partial(1) specifies that we have size_t[1] in front -// // of the characters. -// return L::Partial(1).Pointer(p_.get()); +// return L::Partial().Pointer(p_.get()); // } // // private: -// // Our heap allocation contains a size_t followed by an array of chars. -// using L = Layout; +// // Our heap allocation contains a single size_t followed by an array of +// // chars. +// using L = Layout::WithStaticSizes<1>; // std::unique_ptr p_; // }; // @@ -146,11 +166,12 @@ // // The interface exported by this file consists of: // - class `Layout<>` and its public members. -// - The public members of class `internal_layout::LayoutImpl<>`. That class -// isn't intended to be used directly, and its name and template parameter -// list are internal implementation details, but the class itself provides -// most of the functionality in this file. See comments on its members for -// detailed documentation. +// - The public members of classes `internal_layout::LayoutWithStaticSizes<>` +// and `internal_layout::LayoutImpl<>`. Those classes aren't intended to be +// used directly, and their name and template parameter list are internal +// implementation details, but the classes themselves provide most of the +// functionality in this file. See comments on their members for detailed +// documentation. // // `Layout::Partial(count1,..., countm)` (where `m` <= `n`) returns a // `LayoutImpl<>` object. `Layout layout(count1,..., countn)` @@ -164,13 +185,14 @@ #include #include -#include +#include #include #include #include #include #include +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/debugging/internal/demangle.h" #include "absl/meta/type_traits.h" @@ -209,9 +231,6 @@ struct NotAligned> { template using IntToSize = size_t; -template -using TypeToSize = size_t; - template struct Type : NotAligned { using type = T; @@ -308,7 +327,8 @@ using IsLegalElementType = std::integral_constant< !std::is_volatile::type>::value && adl_barrier::IsPow2(AlignOf::value)>; -template +template class LayoutImpl; // Public base class of `Layout` and the result type of `Layout::Partial()`. @@ -316,31 +336,49 @@ class LayoutImpl; // `Elements...` contains all template arguments of `Layout` that created this // instance. // -// `SizeSeq...` is `[0, NumSizes)` where `NumSizes` is the number of arguments -// passed to `Layout::Partial()` or `Layout::Layout()`. +// `StaticSizeSeq...` is an index_sequence containing the sizes specified at +// compile-time. +// +// `RuntimeSizeSeq...` is `[0, NumRuntimeSizes)`, where `NumRuntimeSizes` is the +// number of arguments passed to `Layout::Partial()` or `Layout::Layout()`. +// +// `SizeSeq...` is `[0, NumSizes)` where `NumSizes` is `NumRuntimeSizes` plus +// the number of sizes in `StaticSizeSeq`. // // `OffsetSeq...` is `[0, NumOffsets)` where `NumOffsets` is // `Min(sizeof...(Elements), NumSizes + 1)` (the number of arrays for which we // can compute offsets). -template -class LayoutImpl, absl::index_sequence, - absl::index_sequence> { +template +class LayoutImpl< + std::tuple, absl::index_sequence, + absl::index_sequence, absl::index_sequence, + absl::index_sequence> { private: static_assert(sizeof...(Elements) > 0, "At least one field is required"); static_assert(absl::conjunction...>::value, "Invalid element type (see IsLegalElementType)"); + static_assert(sizeof...(StaticSizeSeq) <= sizeof...(Elements), + "Too many static sizes specified"); enum { NumTypes = sizeof...(Elements), + NumStaticSizes = sizeof...(StaticSizeSeq), + NumRuntimeSizes = sizeof...(RuntimeSizeSeq), NumSizes = sizeof...(SizeSeq), NumOffsets = sizeof...(OffsetSeq), }; // These are guaranteed by `Layout`. + static_assert(NumStaticSizes + NumRuntimeSizes == NumSizes, "Internal error"); + static_assert(NumSizes <= NumTypes, "Internal error"); static_assert(NumOffsets == adl_barrier::Min(NumTypes, NumSizes + 1), "Internal error"); static_assert(NumTypes > 0, "Internal error"); + static constexpr std::array kStaticSizes = { + StaticSizeSeq...}; + // Returns the index of `T` in `Elements...`. Results in a compilation error // if `Elements...` doesn't contain exactly one instance of `T`. template @@ -363,7 +401,7 @@ class LayoutImpl, absl::index_sequence, template using ElementType = typename std::tuple_element::type; - constexpr explicit LayoutImpl(IntToSize... sizes) + constexpr explicit LayoutImpl(IntToSize... sizes) : size_{sizes...} {} // Alignment of the layout, equal to the strictest alignment of all elements. @@ -389,7 +427,7 @@ class LayoutImpl, absl::index_sequence, constexpr size_t Offset() const { static_assert(N < NumOffsets, "Index out of bounds"); return adl_barrier::Align( - Offset() + SizeOf>::value * size_[N - 1], + Offset() + SizeOf>::value * Size(), ElementAlignment::value); } @@ -411,8 +449,7 @@ class LayoutImpl, absl::index_sequence, return {{Offset()...}}; } - // The number of elements in the Nth array. This is the Nth argument of - // `Layout::Partial()` or `Layout::Layout()` (zero-based). + // The number of elements in the Nth array (zero-based). // // // int[3], 4 bytes of padding, double[4]. // Layout x(3, 4); @@ -420,10 +457,15 @@ class LayoutImpl, absl::index_sequence, // assert(x.Size<1>() == 4); // // Requires: `N < NumSizes`. - template + template = 0> + constexpr size_t Size() const { + return kStaticSizes[N]; + } + + template = NumStaticSizes)> = 0> constexpr size_t Size() const { static_assert(N < NumSizes, "Index out of bounds"); - return size_[N]; + return size_[N - NumStaticSizes]; } // The number of elements in the array with the specified element type. @@ -500,13 +542,8 @@ class LayoutImpl, absl::index_sequence, // std::tie(ints, doubles) = x.Pointers(p); // // Requires: `p` is aligned to `Alignment()`. - // - // Note: We're not using ElementType alias here because it does not compile - // under MSVC. template - std::tuple::type>*...> - Pointers(Char* p) const { + auto Pointers(Char* p) const { return std::tuple>*...>( Pointer(p)...); } @@ -559,15 +596,10 @@ class LayoutImpl, absl::index_sequence, // // Requires: `p` is aligned to `Alignment()`. // - // Note: We're not using ElementType alias here because it does not compile - // under MSVC. + // Note: We mark the parameter as unused because GCC detects it is not used + // when `SizeSeq` is empty [-Werror=unused-but-set-parameter]. template - std::tuple::type>>...> - Slices(Char* p) const { - // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63875 (fixed - // in 6.1). - (void)p; + auto Slices(ABSL_ATTRIBUTE_UNUSED Char* p) const { return std::tuple>>...>( Slice(p)...); } @@ -582,7 +614,7 @@ class LayoutImpl, absl::index_sequence, constexpr size_t AllocSize() const { static_assert(NumTypes == NumSizes, "You must specify sizes of all fields"); return Offset() + - SizeOf>::value * size_[NumTypes - 1]; + SizeOf>::value * Size(); } // If built with --config=asan, poisons padding bytes (if any) in the @@ -606,7 +638,7 @@ class LayoutImpl, absl::index_sequence, // The `if` is an optimization. It doesn't affect the observable behaviour. if (ElementAlignment::value % ElementAlignment::value) { size_t start = - Offset() + SizeOf>::value * size_[N - 1]; + Offset() + SizeOf>::value * Size(); ASAN_POISON_MEMORY_REGION(p + start, Offset() - start); } #endif @@ -635,47 +667,66 @@ class LayoutImpl, absl::index_sequence, adl_barrier::TypeName>()...}; std::string res = absl::StrCat("@0", types[0], "(", sizes[0], ")"); for (size_t i = 0; i != NumOffsets - 1; ++i) { - absl::StrAppend(&res, "[", size_[i], "]; @", offsets[i + 1], types[i + 1], - "(", sizes[i + 1], ")"); + absl::StrAppend(&res, "[", DebugSize(i), "]; @", offsets[i + 1], + types[i + 1], "(", sizes[i + 1], ")"); } // NumSizes is a constant that may be zero. Some compilers cannot see that // inside the if statement "size_[NumSizes - 1]" must be valid. int last = static_cast(NumSizes) - 1; if (NumTypes == NumSizes && last >= 0) { - absl::StrAppend(&res, "[", size_[last], "]"); + absl::StrAppend(&res, "[", DebugSize(static_cast(last)), "]"); } return res; } private: + size_t DebugSize(size_t n) const { + if (n < NumStaticSizes) { + return kStaticSizes[n]; + } else { + return size_[n - NumStaticSizes]; + } + } + // Arguments of `Layout::Partial()` or `Layout::Layout()`. - size_t size_[NumSizes > 0 ? NumSizes : 1]; + size_t size_[NumRuntimeSizes > 0 ? NumRuntimeSizes : 1]; }; -template +// Defining a constexpr static class member variable is redundant and deprecated +// in C++17, but required in C++14. +template +constexpr std::array LayoutImpl< + std::tuple, absl::index_sequence, + absl::index_sequence, absl::index_sequence, + absl::index_sequence>::kStaticSizes; + +template using LayoutType = LayoutImpl< - std::tuple, absl::make_index_sequence, - absl::make_index_sequence>; + std::tuple, StaticSizeSeq, + absl::make_index_sequence, + absl::make_index_sequence, + absl::make_index_sequence>; -} // namespace internal_layout +template +class LayoutWithStaticSizes + : public LayoutType { + private: + using Super = + LayoutType; -// Descriptor of arrays of various types and sizes laid out in memory one after -// another. See the top of the file for documentation. -// -// Check out the public API of internal_layout::LayoutImpl above. The type is -// internal to the library but its methods are public, and they are inherited -// by `Layout`. -template -class Layout : public internal_layout::LayoutType { public: - static_assert(sizeof...(Ts) > 0, "At least one field is required"); - static_assert( - absl::conjunction...>::value, - "Invalid element type (see IsLegalElementType)"); - // The result type of `Partial()` with `NumSizes` arguments. template - using PartialType = internal_layout::LayoutType; + using PartialType = + internal_layout::LayoutType; // `Layout` knows the element types of the arrays we want to lay out in // memory but not the number of elements in each array. @@ -701,14 +752,18 @@ class Layout : public internal_layout::LayoutType { // Note: The sizes of the arrays must be specified in number of elements, // not in bytes. // - // Requires: `sizeof...(Sizes) <= sizeof...(Ts)`. + // Requires: `sizeof...(Sizes) + NumStaticSizes <= sizeof...(Ts)`. // Requires: all arguments are convertible to `size_t`. template static constexpr PartialType Partial(Sizes&&... sizes) { - static_assert(sizeof...(Sizes) <= sizeof...(Ts), ""); - return PartialType(std::forward(sizes)...); + static_assert(sizeof...(Sizes) + StaticSizeSeq::size() <= sizeof...(Ts), + ""); + return PartialType( + static_cast(std::forward(sizes))...); } + // Inherit LayoutType's constructor. + // // Creates a layout with the sizes of all arrays specified. If you know // only the sizes of the first N arrays (where N can be zero), you can use // `Partial()` defined above. The constructor is essentially equivalent to @@ -717,8 +772,69 @@ class Layout : public internal_layout::LayoutType { // // Note: The sizes of the arrays must be specified in number of elements, // not in bytes. - constexpr explicit Layout(internal_layout::TypeToSize... sizes) - : internal_layout::LayoutType(sizes...) {} + // + // Implementation note: we do this via a `using` declaration instead of + // defining our own explicit constructor because the signature of LayoutType's + // constructor depends on RuntimeSizeSeq, which we don't have access to here. + // If we defined our own constructor here, it would have to use a parameter + // pack and then cast the arguments to size_t when calling the superclass + // constructor, similar to what Partial() does. But that would suffer from the + // same problem that Partial() has, which is that the parameter types are + // inferred from the arguments, which may be signed types, which must then be + // cast to size_t. This can lead to negative values being silently (i.e. with + // no compiler warnings) cast to an unsigned type. Having a constructor with + // size_t parameters helps the compiler generate better warnings about + // potential bad casts, while avoiding false warnings when positive literal + // arguments are used. If an argument is a positive literal integer (e.g. + // `1`), the compiler will understand that it can be safely converted to + // size_t, and hence not generate a warning. But if a negative literal (e.g. + // `-1`) or a variable with signed type is used, then it can generate a + // warning about a potentially unsafe implicit cast. It would be great if we + // could do this for Partial() too, but unfortunately as of C++23 there seems + // to be no way to define a function with a variable number of parameters of a + // certain type, a.k.a. homogeneous function parameter packs. So we're forced + // to choose between explicitly casting the arguments to size_t, which + // suppresses all warnings, even potentially valid ones, or implicitly casting + // them to size_t, which generates bogus warnings whenever literal arguments + // are used, even if they're positive. + using Super::Super; +}; + +} // namespace internal_layout + +// Descriptor of arrays of various types and sizes laid out in memory one after +// another. See the top of the file for documentation. +// +// Check out the public API of internal_layout::LayoutWithStaticSizes and +// internal_layout::LayoutImpl above. Those types are internal to the library +// but their methods are public, and they are inherited by `Layout`. +template +class Layout : public internal_layout::LayoutWithStaticSizes< + absl::make_index_sequence<0>, Ts...> { + private: + using Super = + internal_layout::LayoutWithStaticSizes, + Ts...>; + + public: + // If you know the sizes of some or all of the arrays at compile time, you can + // use `WithStaticSizes` or `WithStaticSizeSequence` to create a `Layout` type + // with those sizes baked in. This can help the compiler generate optimal code + // for calculating array offsets and AllocSize(). + // + // Like `Partial()`, the N sizes you specify are for the first N arrays, and + // they specify the number of elements in each array, not the number of bytes. + template + using WithStaticSizeSequence = + internal_layout::LayoutWithStaticSizes; + + template + using WithStaticSizes = + WithStaticSizeSequence>; + + // Inherit LayoutWithStaticSizes's constructor, which requires you to specify + // all the array sizes. + using Super::Super; }; } // namespace container_internal diff --git a/yass/third_party/abseil-cpp/absl/container/internal/layout_benchmark.cc b/yass/third_party/abseil-cpp/absl/container/internal/layout_benchmark.cc index 3af35e33ed..d6f2669760 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/layout_benchmark.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/layout_benchmark.cc @@ -15,6 +15,9 @@ // Every benchmark should have the same performance as the corresponding // headroom benchmark. +#include +#include + #include "absl/base/internal/raw_logging.h" #include "absl/container/internal/layout.h" #include "benchmark/benchmark.h" @@ -28,6 +31,8 @@ using ::benchmark::DoNotOptimize; using Int128 = int64_t[2]; +constexpr size_t MyAlign(size_t n, size_t m) { return (n + m - 1) & ~(m - 1); } + // This benchmark provides the upper bound on performance for BM_OffsetConstant. template void BM_OffsetConstantHeadroom(benchmark::State& state) { @@ -36,6 +41,15 @@ void BM_OffsetConstantHeadroom(benchmark::State& state) { } } +template +void BM_OffsetConstantStatic(benchmark::State& state) { + using L = typename Layout::template WithStaticSizes<3, 5, 7>; + ABSL_RAW_CHECK(L::Partial().template Offset<3>() == Offset, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(L::Partial().template Offset<3>()); + } +} + template void BM_OffsetConstant(benchmark::State& state) { using L = Layout; @@ -46,14 +60,74 @@ void BM_OffsetConstant(benchmark::State& state) { } } +template +void BM_OffsetConstantIndirect(benchmark::State& state) { + using L = Layout; + auto p = L::Partial(3, 5, 7); + ABSL_RAW_CHECK(p.template Offset<3>() == Offset, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(p); + DoNotOptimize(p.template Offset<3>()); + } +} + +template +size_t PartialOffset(size_t k); + +template <> +size_t PartialOffset(size_t k) { + constexpr size_t o = MyAlign(MyAlign(3 * 1, 2) + 5 * 2, 4); + return MyAlign(o + k * 4, 8); +} + +template <> +size_t PartialOffset(size_t k) { + // No alignment is necessary. + return 3 * 16 + 5 * 4 + k * 2; +} + +// This benchmark provides the upper bound on performance for BM_OffsetVariable. +template +void BM_OffsetPartialHeadroom(benchmark::State& state) { + size_t k = 7; + ABSL_RAW_CHECK(PartialOffset(k) == Offset, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(k); + DoNotOptimize(PartialOffset(k)); + } +} + +template +void BM_OffsetPartialStatic(benchmark::State& state) { + using L = typename Layout::template WithStaticSizes<3, 5>; + size_t k = 7; + ABSL_RAW_CHECK(L::Partial(k).template Offset<3>() == Offset, + "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(k); + DoNotOptimize(L::Partial(k).template Offset<3>()); + } +} + +template +void BM_OffsetPartial(benchmark::State& state) { + using L = Layout; + size_t k = 7; + ABSL_RAW_CHECK(L::Partial(3, 5, k).template Offset<3>() == Offset, + "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(k); + DoNotOptimize(L::Partial(3, 5, k).template Offset<3>()); + } +} + template size_t VariableOffset(size_t n, size_t m, size_t k); template <> size_t VariableOffset(size_t n, size_t m, size_t k) { - auto Align = [](size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }; - return Align(Align(Align(n * 1, 2) + m * 2, 4) + k * 4, 8); + return MyAlign(MyAlign(MyAlign(n * 1, 2) + m * 2, 4) + k * 4, 8); } template <> @@ -94,6 +168,75 @@ void BM_OffsetVariable(benchmark::State& state) { } } +template +size_t AllocSize(size_t x); + +template <> +size_t AllocSize(size_t x) { + constexpr size_t o = + Layout::Partial(3, 5, 7) + .template Offset(); + return o + sizeof(Int128) * x; +} + +template <> +size_t AllocSize(size_t x) { + constexpr size_t o = + Layout::Partial(3, 5, 7) + .template Offset(); + return o + sizeof(int8_t) * x; +} + +// This benchmark provides the upper bound on performance for BM_AllocSize +template +void BM_AllocSizeHeadroom(benchmark::State& state) { + size_t x = 9; + ABSL_RAW_CHECK(AllocSize(x) == Size, "Invalid size"); + for (auto _ : state) { + DoNotOptimize(x); + DoNotOptimize(AllocSize(x)); + } +} + +template +void BM_AllocSizeStatic(benchmark::State& state) { + using L = typename Layout::template WithStaticSizes<3, 5, 7>; + size_t x = 9; + ABSL_RAW_CHECK(L(x).AllocSize() == Size, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(x); + DoNotOptimize(L(x).AllocSize()); + } +} + +template +void BM_AllocSize(benchmark::State& state) { + using L = Layout; + size_t n = 3; + size_t m = 5; + size_t k = 7; + size_t x = 9; + ABSL_RAW_CHECK(L(n, m, k, x).AllocSize() == Size, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(n); + DoNotOptimize(m); + DoNotOptimize(k); + DoNotOptimize(x); + DoNotOptimize(L(n, m, k, x).AllocSize()); + } +} + +template +void BM_AllocSizeIndirect(benchmark::State& state) { + using L = Layout; + auto l = L(3, 5, 7, 9); + ABSL_RAW_CHECK(l.AllocSize() == Size, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(l); + DoNotOptimize(l.AllocSize()); + } +} + // Run all benchmarks in two modes: // // Layout with padding: int8_t[3], int16_t[5], int32_t[7], Int128[?]. @@ -106,16 +249,46 @@ void BM_OffsetVariable(benchmark::State& state) { OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 48, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_OffsetConstantStatic, 48, int8_t, int16_t, int32_t, Int128); OFFSET_BENCHMARK(BM_OffsetConstant, 48, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_OffsetConstantIndirect, 48, int8_t, int16_t, int32_t, + Int128); + OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 82, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_OffsetConstantStatic, 82, Int128, int32_t, int16_t, int8_t); OFFSET_BENCHMARK(BM_OffsetConstant, 82, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_OffsetConstantIndirect, 82, Int128, int32_t, int16_t, + int8_t); + +OFFSET_BENCHMARK(BM_OffsetPartialHeadroom, 48, int8_t, int16_t, int32_t, + Int128); +OFFSET_BENCHMARK(BM_OffsetPartialStatic, 48, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_OffsetPartial, 48, int8_t, int16_t, int32_t, Int128); + +OFFSET_BENCHMARK(BM_OffsetPartialHeadroom, 82, Int128, int32_t, int16_t, + int8_t); +OFFSET_BENCHMARK(BM_OffsetPartialStatic, 82, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_OffsetPartial, 82, Int128, int32_t, int16_t, int8_t); + OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 48, int8_t, int16_t, int32_t, Int128); OFFSET_BENCHMARK(BM_OffsetVariable, 48, int8_t, int16_t, int32_t, Int128); + OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 82, Int128, int32_t, int16_t, int8_t); OFFSET_BENCHMARK(BM_OffsetVariable, 82, Int128, int32_t, int16_t, int8_t); + +OFFSET_BENCHMARK(BM_AllocSizeHeadroom, 192, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_AllocSizeStatic, 192, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_AllocSize, 192, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_AllocSizeIndirect, 192, int8_t, int16_t, int32_t, Int128); + +OFFSET_BENCHMARK(BM_AllocSizeHeadroom, 91, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_AllocSizeStatic, 91, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_AllocSize, 91, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_AllocSizeIndirect, 91, Int128, int32_t, int16_t, int8_t); + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/yass/third_party/abseil-cpp/absl/container/internal/layout_test.cc b/yass/third_party/abseil-cpp/absl/container/internal/layout_test.cc index ae55cf7e51..47fc9f3305 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/layout_test.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/layout_test.cc @@ -68,9 +68,7 @@ struct alignas(8) Int128 { // int64_t is *not* 8-byte aligned on all platforms! struct alignas(8) Int64 { int64_t a; - friend bool operator==(Int64 lhs, Int64 rhs) { - return lhs.a == rhs.a; - } + friend bool operator==(Int64 lhs, Int64 rhs) { return lhs.a == rhs.a; } }; // Properties of types that this test relies on. @@ -271,6 +269,35 @@ TEST(Layout, Offsets) { } } +TEST(Layout, StaticOffsets) { + using L = Layout; + { + using SL = L::WithStaticSizes<>; + EXPECT_THAT(SL::Partial().Offsets(), ElementsAre(0)); + EXPECT_THAT(SL::Partial(5).Offsets(), ElementsAre(0, 8)); + EXPECT_THAT(SL::Partial(5, 3, 1).Offsets(), ElementsAre(0, 8, 24)); + EXPECT_THAT(SL(5, 3, 1).Offsets(), ElementsAre(0, 8, 24)); + } + { + using SL = L::WithStaticSizes<5>; + EXPECT_THAT(SL::Partial().Offsets(), ElementsAre(0, 8)); + EXPECT_THAT(SL::Partial(3).Offsets(), ElementsAre(0, 8, 24)); + EXPECT_THAT(SL::Partial(3, 1).Offsets(), ElementsAre(0, 8, 24)); + EXPECT_THAT(SL(3, 1).Offsets(), ElementsAre(0, 8, 24)); + } + { + using SL = L::WithStaticSizes<5, 3>; + EXPECT_THAT(SL::Partial().Offsets(), ElementsAre(0, 8, 24)); + EXPECT_THAT(SL::Partial(1).Offsets(), ElementsAre(0, 8, 24)); + EXPECT_THAT(SL(1).Offsets(), ElementsAre(0, 8, 24)); + } + { + using SL = L::WithStaticSizes<5, 3, 1>; + EXPECT_THAT(SL::Partial().Offsets(), ElementsAre(0, 8, 24)); + EXPECT_THAT(SL().Offsets(), ElementsAre(0, 8, 24)); + } +} + TEST(Layout, AllocSize) { { using L = Layout; @@ -295,6 +322,30 @@ TEST(Layout, AllocSize) { } } +TEST(Layout, StaticAllocSize) { + using L = Layout; + { + using SL = L::WithStaticSizes<>; + EXPECT_EQ(136, SL::Partial(3, 5, 7).AllocSize()); + EXPECT_EQ(136, SL(3, 5, 7).AllocSize()); + } + { + using SL = L::WithStaticSizes<3>; + EXPECT_EQ(136, SL::Partial(5, 7).AllocSize()); + EXPECT_EQ(136, SL(5, 7).AllocSize()); + } + { + using SL = L::WithStaticSizes<3, 5>; + EXPECT_EQ(136, SL::Partial(7).AllocSize()); + EXPECT_EQ(136, SL(7).AllocSize()); + } + { + using SL = L::WithStaticSizes<3, 5, 7>; + EXPECT_EQ(136, SL::Partial().AllocSize()); + EXPECT_EQ(136, SL().AllocSize()); + } +} + TEST(Layout, SizeByIndex) { { using L = Layout; @@ -370,6 +421,27 @@ TEST(Layout, Sizes) { } } +TEST(Layout, StaticSize) { + using L = Layout; + { + using SL = L::WithStaticSizes<>; + EXPECT_THAT(SL::Partial().Sizes(), ElementsAre()); + EXPECT_THAT(SL::Partial(3).Size<0>(), 3); + EXPECT_THAT(SL::Partial(3).Size(), 3); + EXPECT_THAT(SL::Partial(3).Sizes(), ElementsAre(3)); + EXPECT_THAT(SL::Partial(3, 5, 7).Size<0>(), 3); + EXPECT_THAT(SL::Partial(3, 5, 7).Size(), 3); + EXPECT_THAT(SL::Partial(3, 5, 7).Size<2>(), 7); + EXPECT_THAT(SL::Partial(3, 5, 7).Size(), 7); + EXPECT_THAT(SL::Partial(3, 5, 7).Sizes(), ElementsAre(3, 5, 7)); + EXPECT_THAT(SL(3, 5, 7).Size<0>(), 3); + EXPECT_THAT(SL(3, 5, 7).Size(), 3); + EXPECT_THAT(SL(3, 5, 7).Size<2>(), 7); + EXPECT_THAT(SL(3, 5, 7).Size(), 7); + EXPECT_THAT(SL(3, 5, 7).Sizes(), ElementsAre(3, 5, 7)); + } +} + TEST(Layout, PointerByIndex) { alignas(max_align_t) const unsigned char p[100] = {0}; { @@ -720,6 +792,61 @@ TEST(Layout, MutablePointers) { } } +TEST(Layout, StaticPointers) { + alignas(max_align_t) const unsigned char p[100] = {0}; + using L = Layout; + { + const auto x = L::WithStaticSizes<>::Partial(); + EXPECT_EQ(std::make_tuple(x.Pointer<0>(p)), + Type>(x.Pointers(p))); + } + { + const auto x = L::WithStaticSizes<>::Partial(1); + EXPECT_EQ(std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p)), + (Type>(x.Pointers(p)))); + } + { + const auto x = L::WithStaticSizes<1>::Partial(); + EXPECT_EQ(std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p)), + (Type>(x.Pointers(p)))); + } + { + const auto x = L::WithStaticSizes<>::Partial(1, 2, 3); + EXPECT_EQ( + std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p), x.Pointer<2>(p)), + (Type>( + x.Pointers(p)))); + } + { + const auto x = L::WithStaticSizes<1>::Partial(2, 3); + EXPECT_EQ( + std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p), x.Pointer<2>(p)), + (Type>( + x.Pointers(p)))); + } + { + const auto x = L::WithStaticSizes<1, 2>::Partial(3); + EXPECT_EQ( + std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p), x.Pointer<2>(p)), + (Type>( + x.Pointers(p)))); + } + { + const auto x = L::WithStaticSizes<1, 2, 3>::Partial(); + EXPECT_EQ( + std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p), x.Pointer<2>(p)), + (Type>( + x.Pointers(p)))); + } + { + const L::WithStaticSizes<1, 2, 3> x; + EXPECT_EQ( + std::make_tuple(x.Pointer<0>(p), x.Pointer<1>(p), x.Pointer<2>(p)), + (Type>( + x.Pointers(p)))); + } +} + TEST(Layout, SliceByIndexSize) { alignas(max_align_t) const unsigned char p[100] = {0}; { @@ -769,7 +896,6 @@ TEST(Layout, SliceByTypeSize) { EXPECT_EQ(7, L(3, 5, 7).Slice(p).size()); } } - TEST(Layout, MutableSliceByIndexSize) { alignas(max_align_t) unsigned char p[100] = {0}; { @@ -820,6 +946,39 @@ TEST(Layout, MutableSliceByTypeSize) { } } +TEST(Layout, StaticSliceSize) { + alignas(max_align_t) const unsigned char cp[100] = {0}; + alignas(max_align_t) unsigned char p[100] = {0}; + using L = Layout; + using SL = L::WithStaticSizes<3, 5>; + + EXPECT_EQ(3, SL::Partial().Slice<0>(cp).size()); + EXPECT_EQ(3, SL::Partial().Slice(cp).size()); + EXPECT_EQ(3, SL::Partial(7).Slice<0>(cp).size()); + EXPECT_EQ(3, SL::Partial(7).Slice(cp).size()); + + EXPECT_EQ(5, SL::Partial().Slice<1>(cp).size()); + EXPECT_EQ(5, SL::Partial().Slice(cp).size()); + EXPECT_EQ(5, SL::Partial(7).Slice<1>(cp).size()); + EXPECT_EQ(5, SL::Partial(7).Slice(cp).size()); + + EXPECT_EQ(7, SL::Partial(7).Slice<2>(cp).size()); + EXPECT_EQ(7, SL::Partial(7).Slice(cp).size()); + + EXPECT_EQ(3, SL::Partial().Slice<0>(p).size()); + EXPECT_EQ(3, SL::Partial().Slice(p).size()); + EXPECT_EQ(3, SL::Partial(7).Slice<0>(p).size()); + EXPECT_EQ(3, SL::Partial(7).Slice(p).size()); + + EXPECT_EQ(5, SL::Partial().Slice<1>(p).size()); + EXPECT_EQ(5, SL::Partial().Slice(p).size()); + EXPECT_EQ(5, SL::Partial(7).Slice<1>(p).size()); + EXPECT_EQ(5, SL::Partial(7).Slice(p).size()); + + EXPECT_EQ(7, SL::Partial(7).Slice<2>(p).size()); + EXPECT_EQ(7, SL::Partial(7).Slice(p).size()); +} + TEST(Layout, SliceByIndexData) { alignas(max_align_t) const unsigned char p[100] = {0}; { @@ -1230,6 +1389,39 @@ TEST(Layout, MutableSliceByTypeData) { } } +TEST(Layout, StaticSliceData) { + alignas(max_align_t) const unsigned char cp[100] = {0}; + alignas(max_align_t) unsigned char p[100] = {0}; + using L = Layout; + using SL = L::WithStaticSizes<3, 5>; + + EXPECT_EQ(0, Distance(cp, SL::Partial().Slice<0>(cp).data())); + EXPECT_EQ(0, Distance(cp, SL::Partial().Slice(cp).data())); + EXPECT_EQ(0, Distance(cp, SL::Partial(7).Slice<0>(cp).data())); + EXPECT_EQ(0, Distance(cp, SL::Partial(7).Slice(cp).data())); + + EXPECT_EQ(4, Distance(cp, SL::Partial().Slice<1>(cp).data())); + EXPECT_EQ(4, Distance(cp, SL::Partial().Slice(cp).data())); + EXPECT_EQ(4, Distance(cp, SL::Partial(7).Slice<1>(cp).data())); + EXPECT_EQ(4, Distance(cp, SL::Partial(7).Slice(cp).data())); + + EXPECT_EQ(24, Distance(cp, SL::Partial(7).Slice<2>(cp).data())); + EXPECT_EQ(24, Distance(cp, SL::Partial(7).Slice(cp).data())); + + EXPECT_EQ(0, Distance(p, SL::Partial().Slice<0>(p).data())); + EXPECT_EQ(0, Distance(p, SL::Partial().Slice(p).data())); + EXPECT_EQ(0, Distance(p, SL::Partial(7).Slice<0>(p).data())); + EXPECT_EQ(0, Distance(p, SL::Partial(7).Slice(p).data())); + + EXPECT_EQ(4, Distance(p, SL::Partial().Slice<1>(p).data())); + EXPECT_EQ(4, Distance(p, SL::Partial().Slice(p).data())); + EXPECT_EQ(4, Distance(p, SL::Partial(7).Slice<1>(p).data())); + EXPECT_EQ(4, Distance(p, SL::Partial(7).Slice(p).data())); + + EXPECT_EQ(24, Distance(p, SL::Partial(7).Slice<2>(p).data())); + EXPECT_EQ(24, Distance(p, SL::Partial(7).Slice(p).data())); +} + MATCHER_P(IsSameSlice, slice, "") { return arg.size() == slice.size() && arg.data() == slice.data(); } @@ -1339,6 +1531,43 @@ TEST(Layout, MutableSlices) { } } +TEST(Layout, StaticSlices) { + alignas(max_align_t) const unsigned char cp[100] = {0}; + alignas(max_align_t) unsigned char p[100] = {0}; + using SL = Layout::WithStaticSizes<1, 2>; + { + const auto x = SL::Partial(); + EXPECT_THAT( + (Type, Span>>( + x.Slices(cp))), + Tuple(IsSameSlice(x.Slice<0>(cp)), IsSameSlice(x.Slice<1>(cp)))); + EXPECT_THAT((Type, Span>>(x.Slices(p))), + Tuple(IsSameSlice(x.Slice<0>(p)), IsSameSlice(x.Slice<1>(p)))); + } + { + const auto x = SL::Partial(3); + EXPECT_THAT((Type, Span, + Span>>(x.Slices(cp))), + Tuple(IsSameSlice(x.Slice<0>(cp)), IsSameSlice(x.Slice<1>(cp)), + IsSameSlice(x.Slice<2>(cp)))); + EXPECT_THAT((Type, Span, Span>>( + x.Slices(p))), + Tuple(IsSameSlice(x.Slice<0>(p)), IsSameSlice(x.Slice<1>(p)), + IsSameSlice(x.Slice<2>(p)))); + } + { + const SL x(3); + EXPECT_THAT((Type, Span, + Span>>(x.Slices(cp))), + Tuple(IsSameSlice(x.Slice<0>(cp)), IsSameSlice(x.Slice<1>(cp)), + IsSameSlice(x.Slice<2>(cp)))); + EXPECT_THAT((Type, Span, Span>>( + x.Slices(p))), + Tuple(IsSameSlice(x.Slice<0>(p)), IsSameSlice(x.Slice<1>(p)), + IsSameSlice(x.Slice<2>(p)))); + } +} + TEST(Layout, UnalignedTypes) { constexpr Layout x(1, 2, 3); alignas(max_align_t) unsigned char p[x.AllocSize() + 1]; @@ -1377,6 +1606,36 @@ TEST(Layout, Alignment) { static_assert(Layout::Alignment() == 8, ""); static_assert(Layout::Alignment() == 8, ""); static_assert(Layout::Alignment() == 8, ""); + static_assert(Layout::Alignment() == 8, ""); + static_assert( + Layout>::WithStaticSizes<>::Alignment() == 64, ""); + static_assert( + Layout>::WithStaticSizes<2>::Alignment() == 64, ""); +} + +TEST(Layout, StaticAlignment) { + static_assert(Layout::WithStaticSizes<>::Alignment() == 1, ""); + static_assert(Layout::WithStaticSizes<0>::Alignment() == 1, ""); + static_assert(Layout::WithStaticSizes<7>::Alignment() == 1, ""); + static_assert(Layout::WithStaticSizes<>::Alignment() == 4, ""); + static_assert(Layout::WithStaticSizes<0>::Alignment() == 4, ""); + static_assert(Layout::WithStaticSizes<3>::Alignment() == 4, ""); + static_assert( + Layout>::WithStaticSizes<>::Alignment() == 64, ""); + static_assert( + Layout>::WithStaticSizes<0>::Alignment() == 64, ""); + static_assert( + Layout>::WithStaticSizes<2>::Alignment() == 64, ""); + static_assert( + Layout::WithStaticSizes<>::Alignment() == 8, ""); + static_assert( + Layout::WithStaticSizes<0, 0, 0>::Alignment() == + 8, + ""); + static_assert( + Layout::WithStaticSizes<1, 1, 1>::Alignment() == + 8, + ""); } TEST(Layout, ConstexprPartial) { @@ -1384,6 +1643,15 @@ TEST(Layout, ConstexprPartial) { constexpr Layout> x(1, 3); static_assert(x.Partial(1).template Offset<1>() == 2 * M, ""); } + +TEST(Layout, StaticConstexpr) { + constexpr size_t M = alignof(max_align_t); + using L = Layout>; + using SL = L::WithStaticSizes<1, 3>; + constexpr SL x; + static_assert(x.Offset<1>() == 2 * M, ""); +} + // [from, to) struct Region { size_t from; @@ -1458,6 +1726,41 @@ TEST(Layout, PoisonPadding) { } } +TEST(Layout, StaticPoisonPadding) { + using L = Layout; + using SL = L::WithStaticSizes<1, 2>; + + constexpr size_t n = L::Partial(1, 2, 3, 4).AllocSize(); + { + constexpr auto x = SL::Partial(); + alignas(max_align_t) const unsigned char c[n] = {}; + x.PoisonPadding(c); + EXPECT_EQ(x.Slices(c), x.Slices(c)); + ExpectPoisoned(c, {{1, 8}}); + } + { + constexpr auto x = SL::Partial(3); + alignas(max_align_t) const unsigned char c[n] = {}; + x.PoisonPadding(c); + EXPECT_EQ(x.Slices(c), x.Slices(c)); + ExpectPoisoned(c, {{1, 8}, {36, 40}}); + } + { + constexpr auto x = SL::Partial(3, 4); + alignas(max_align_t) const unsigned char c[n] = {}; + x.PoisonPadding(c); + EXPECT_EQ(x.Slices(c), x.Slices(c)); + ExpectPoisoned(c, {{1, 8}, {36, 40}}); + } + { + constexpr SL x(3, 4); + alignas(max_align_t) const unsigned char c[n] = {}; + x.PoisonPadding(c); + EXPECT_EQ(x.Slices(c), x.Slices(c)); + ExpectPoisoned(c, {{1, 8}, {36, 40}}); + } +} + TEST(Layout, DebugString) { { constexpr auto x = Layout::Partial(); @@ -1500,6 +1803,62 @@ TEST(Layout, DebugString) { } } +TEST(Layout, StaticDebugString) { + { + constexpr auto x = + Layout::WithStaticSizes<>::Partial(); + EXPECT_EQ("@0(1)", x.DebugString()); + } + { + constexpr auto x = + Layout::WithStaticSizes<>::Partial(1); + EXPECT_EQ("@0(1)[1]; @4(4)", x.DebugString()); + } + { + constexpr auto x = + Layout::WithStaticSizes<1>::Partial(); + EXPECT_EQ("@0(1)[1]; @4(4)", x.DebugString()); + } + { + constexpr auto x = + Layout::WithStaticSizes<>::Partial(1, + 2); + EXPECT_EQ("@0(1)[1]; @4(4)[2]; @12(1)", + x.DebugString()); + } + { + constexpr auto x = + Layout::WithStaticSizes<1>::Partial(2); + EXPECT_EQ("@0(1)[1]; @4(4)[2]; @12(1)", + x.DebugString()); + } + { + constexpr auto x = Layout::WithStaticSizes<1, 2>::Partial(); + EXPECT_EQ("@0(1)[1]; @4(4)[2]; @12(1)", + x.DebugString()); + } + { + constexpr auto x = Layout::WithStaticSizes<1, 2, 3, 4>::Partial(); + EXPECT_EQ( + "@0(1)[1]; @4(4)[2]; @12(1)[3]; " + "@16" + + Int128::Name() + "(16)[4]", + x.DebugString()); + } + { + constexpr Layout::WithStaticSizes<1, 2, 3, + 4> + x; + EXPECT_EQ( + "@0(1)[1]; @4(4)[2]; @12(1)[3]; " + "@16" + + Int128::Name() + "(16)[4]", + x.DebugString()); + } +} + TEST(Layout, CharTypes) { constexpr Layout x(1); alignas(max_align_t) char c[x.AllocSize()] = {}; @@ -1638,6 +1997,35 @@ TEST(CompactString, Works) { EXPECT_STREQ("hello", s.c_str()); } +// Same as the previous CompactString example, except we set the first array +// size to 1 statically, since we know it is always 1. This allows us to compute +// the offset of the character array at compile time. +class StaticCompactString { + public: + StaticCompactString(const char* s = "") { // NOLINT + const size_t size = strlen(s); + const SL layout(size + 1); + p_.reset(new unsigned char[layout.AllocSize()]); + layout.PoisonPadding(p_.get()); + *layout.Pointer(p_.get()) = size; + memcpy(layout.Pointer(p_.get()), s, size + 1); + } + + size_t size() const { return *SL::Partial().Pointer(p_.get()); } + + const char* c_str() const { return SL::Partial().Pointer(p_.get()); } + + private: + using SL = Layout::WithStaticSizes<1>; + std::unique_ptr p_; +}; + +TEST(StaticCompactString, Works) { + StaticCompactString s = "hello"; + EXPECT_EQ(5, s.size()); + EXPECT_STREQ("hello", s.c_str()); +} + } // namespace example } // namespace diff --git a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.cc b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.cc index f0f840d1d5..c23c1f3bb2 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.cc @@ -33,11 +33,11 @@ namespace container_internal { // Represents a control byte corresponding to a full slot with arbitrary hash. constexpr ctrl_t ZeroCtrlT() { return static_cast(0); } -// We have space for `growth_left` before a single block of control bytes. A +// We have space for `growth_info` before a single block of control bytes. A // single block of empty control bytes for tables without any slots allocated. // This enables removing a branch in the hot path of find(). In order to ensure // that the control bytes are aligned to 16, we have 16 bytes before the control -// bytes even though growth_left only needs 8. +// bytes even though growth_info only needs 8. alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = { ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), @@ -133,7 +133,7 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, assert(HashSetResizeHelper::SooSlotIndex() == 1); PrepareInsertCommon(common); const size_t offset = H1(hash, common.control()) & 2; - common.set_growth_left(common.growth_left() - 1); + common.growth_info().OverwriteEmptyAsFull(); SetCtrlInSingleGroupTable(common, offset, H2(hash), slot_size); common.infoz().RecordInsert(hash, /*distance_from_desired=*/0); return offset; @@ -274,10 +274,11 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { if (WasNeverFull(c, index)) { SetCtrl(c, index, ctrl_t::kEmpty, slot_size); - c.set_growth_left(c.growth_left() + 1); + c.growth_info().OverwriteFullAsEmpty(); return; } + c.growth_info().OverwriteFullAsDeleted(); SetCtrl(c, index, ctrl_t::kDeleted, slot_size); } diff --git a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.h b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.h index 575491c79b..1d2e2d14a9 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.h +++ b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set.h @@ -240,9 +240,10 @@ namespace container_internal { #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS #error ABSL_SWISSTABLE_ENABLE_GENERATIONS cannot be directly set -#elif defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ - defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ - defined(ABSL_HAVE_MEMORY_SANITIZER) +#elif (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER)) && \ + !defined(NDEBUG_SANITIZER) // If defined, performance is important. // When compiled in sanitizer mode, we add generation integers to the backing // array and iterators. In the backing array, we store the generation between // the control bytes and the slots. When iterators are dereferenced, we assert @@ -570,6 +571,16 @@ inline bool IsEmptyGeneration(const GenerationType* generation) { bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash, const ctrl_t* ctrl); +ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ShouldInsertBackwards( + ABSL_ATTRIBUTE_UNUSED size_t capacity, ABSL_ATTRIBUTE_UNUSED size_t hash, + ABSL_ATTRIBUTE_UNUSED const ctrl_t* ctrl) { +#if defined(NDEBUG) + return false; +#else + return ShouldInsertBackwardsForDebug(capacity, hash, ctrl); +#endif +} + // Returns insert position for the given mask. // We want to add entropy even when ASLR is not enabled. // In debug build we will randomly insert in either the front or back of @@ -1053,6 +1064,88 @@ using CommonFieldsGenerationInfo = CommonFieldsGenerationInfoDisabled; using HashSetIteratorGenerationInfo = HashSetIteratorGenerationInfoDisabled; #endif +// Stored the information regarding number of slots we can still fill +// without needing to rehash. +// +// We want to ensure sufficient number of empty slots in the table in order +// to keep probe sequences relatively short. Empty slot in the probe group +// is required to stop probing. +// +// Tombstones (kDeleted slots) are not included in the growth capacity, +// because we'd like to rehash when the table is filled with tombstones and/or +// full slots. +// +// GrowthInfo also stores a bit that encodes whether table may have any +// deleted slots. +// Most of the tables (>95%) have no deleted slots, so some functions can +// be more efficient with this information. +// +// Callers can also force a rehash via the standard `rehash(0)`, +// which will recompute this value as a side-effect. +// +// See also `CapacityToGrowth()`. +class GrowthInfo { + public: + // Leaves data member uninitialized. + GrowthInfo() = default; + + // Initializes the GrowthInfo assuming we can grow `growth_left` elements + // and there are no kDeleted slots in the table. + void InitGrowthLeftNoDeleted(size_t growth_left) { + growth_left_info_ = growth_left; + } + + // Overwrites single full slot with an empty slot. + void OverwriteFullAsEmpty() { ++growth_left_info_; } + + // Overwrites single empty slot with a full slot. + void OverwriteEmptyAsFull() { + assert(GetGrowthLeft() > 0); + --growth_left_info_; + } + + // Overwrites several empty slots with full slots. + void OverwriteManyEmptyAsFull(size_t cnt) { + assert(GetGrowthLeft() >= cnt); + growth_left_info_ -= cnt; + } + + // Overwrites specified control element with full slot. + void OverwriteControlAsFull(ctrl_t ctrl) { + assert(GetGrowthLeft() >= static_cast(IsEmpty(ctrl))); + growth_left_info_ -= static_cast(IsEmpty(ctrl)); + } + + // Overwrites single full slot with a deleted slot. + void OverwriteFullAsDeleted() { growth_left_info_ |= kDeletedBit; } + + // Returns true if table satisfies two properties: + // 1. Guaranteed to have no kDeleted slots. + // 2. There is a place for at least one element to grow. + bool HasNoDeletedAndGrowthLeft() const { + return static_cast>(growth_left_info_) > 0; + } + + // Returns true if table guaranteed to have no k + bool HasNoDeleted() const { + return static_cast>(growth_left_info_) >= 0; + } + + // Returns the number of elements left to grow. + size_t GetGrowthLeft() const { + return growth_left_info_ & kGrowthLeftMask; + } + + private: + static constexpr size_t kGrowthLeftMask = ((~size_t{}) >> 1); + static constexpr size_t kDeletedBit = ~kGrowthLeftMask; + // Topmost bit signal whenever there are deleted slots. + size_t growth_left_info_; +}; + +static_assert(sizeof(GrowthInfo) == sizeof(size_t), ""); +static_assert(alignof(GrowthInfo) == alignof(size_t), ""); + // Returns whether `n` is a valid capacity (i.e., number of slots). // // A valid capacity is a non-zero integer `2^m - 1`. @@ -1071,9 +1164,9 @@ constexpr size_t NumControlBytes(size_t capacity) { } // Computes the offset from the start of the backing allocation of control. -// infoz and growth_left are stored at the beginning of the backing array. +// infoz and growth_info are stored at the beginning of the backing array. inline static size_t ControlOffset(bool has_infoz) { - return (has_infoz ? sizeof(HashtablezInfoHandle) : 0) + sizeof(size_t); + return (has_infoz ? sizeof(HashtablezInfoHandle) : 0) + sizeof(GrowthInfo); } // Helper class for computing offsets and allocation size of hash set fields. @@ -1130,24 +1223,27 @@ struct soo_tag_t {}; // Sentinel type to indicate SOO CommonFields construction with full size. struct full_soo_tag_t {}; +// Suppress erroneous uninitialized memory errors on GCC. For example, GCC +// thinks that the call to slot_array() in find_or_prepare_insert() is reading +// uninitialized memory, but slot_array is only called there when the table is +// non-empty and this memory is initialized when the table is non-empty. +#if !defined(__clang__) && defined(__GNUC__) +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") \ + _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") x; \ + _Pragma("GCC diagnostic pop") +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) \ + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(return x) +#else +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) x +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) return x +#endif + // This allows us to work around an uninitialized memory warning when // constructing begin() iterators in empty hashtables. union MaybeInitializedPtr { - void* get() const { - // Suppress erroneous uninitialized memory errors on GCC. GCC thinks that - // the call to slot_array() in find_or_prepare_insert() is reading - // uninitialized memory, but slot_array is only called there when the table - // is non-empty and this memory is initialized when the table is non-empty. -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#pragma GCC diagnostic ignored "-Wuninitialized" -#endif - return p; -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic pop -#endif - } + void* get() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(p); } void set(void* ptr) { p = ptr; } void* p; @@ -1163,7 +1259,7 @@ struct HeapPtrs { // This contains `capacity + 1 + NumClonedBytes()` entries, even // when the table is empty (hence EmptyGroup). // - // Note that growth_left is stored immediately before this pointer. + // Note that growth_info is stored immediately before this pointer. // May be uninitialized for SOO tables. ctrl_t* control; @@ -1179,6 +1275,25 @@ union HeapOrSoo { HeapOrSoo() = default; explicit HeapOrSoo(ctrl_t* c) : heap(c) {} + ctrl_t*& control() { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); + } + ctrl_t* control() const { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); + } + MaybeInitializedPtr& slot_array() { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); + } + MaybeInitializedPtr slot_array() const { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); + } + void* get_soo_data() { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(soo_data); + } + const void* get_soo_data() const { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(soo_data); + } + HeapPtrs heap; unsigned char soo_data[sizeof(HeapPtrs)]; }; @@ -1207,34 +1322,24 @@ class CommonFields : public CommonFieldsGenerationInfo { } // The inline data for SOO is written on top of control_/slots_. - const void* soo_data() const { return heap_or_soo_.soo_data; } - void* soo_data() { return heap_or_soo_.soo_data; } + const void* soo_data() const { return heap_or_soo_.get_soo_data(); } + void* soo_data() { return heap_or_soo_.get_soo_data(); } HeapOrSoo heap_or_soo() const { return heap_or_soo_; } const HeapOrSoo& heap_or_soo_ref() const { return heap_or_soo_; } - ctrl_t* control() const { return heap_or_soo_.heap.control; } - void set_control(ctrl_t* c) { heap_or_soo_.heap.control = c; } + ctrl_t* control() const { return heap_or_soo_.control(); } + void set_control(ctrl_t* c) { heap_or_soo_.control() = c; } void* backing_array_start() const { - // growth_left (and maybe infoz) is stored before control bytes. + // growth_info (and maybe infoz) is stored before control bytes. assert(reinterpret_cast(control()) % alignof(size_t) == 0); return control() - ControlOffset(has_infoz()); } // Note: we can't use slots() because Qt defines "slots" as a macro. - void* slot_array() const { return heap_or_soo_.heap.slot_array.get(); } - MaybeInitializedPtr slots_union() const { - // Suppress erroneous uninitialized memory errors on GCC. -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif - return heap_or_soo_.heap.slot_array; -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic pop -#endif - } - void set_slots(void* s) { heap_or_soo_.heap.slot_array.set(s); } + void* slot_array() const { return heap_or_soo_.slot_array().get(); } + MaybeInitializedPtr slots_union() const { return heap_or_soo_.slot_array(); } + void set_slots(void* s) { heap_or_soo_.slot_array().set(s); } // The number of filled slots. size_t size() const { return size_ >> HasInfozShift(); } @@ -1267,17 +1372,15 @@ class CommonFields : public CommonFieldsGenerationInfo { // The number of slots we can still fill without needing to rehash. // This is stored in the heap allocation before the control bytes. - // TODO(b/289225379): experiment with moving growth_left back inline to + // TODO(b/289225379): experiment with moving growth_info back inline to // increase room for SOO. - size_t growth_left() const { - const size_t* gl_ptr = reinterpret_cast(control()) - 1; - assert(reinterpret_cast(gl_ptr) % alignof(size_t) == 0); + GrowthInfo& growth_info() { + auto* gl_ptr = reinterpret_cast(control()) - 1; + assert(reinterpret_cast(gl_ptr) % alignof(GrowthInfo) == 0); return *gl_ptr; } - void set_growth_left(size_t gl) { - size_t* gl_ptr = reinterpret_cast(control()) - 1; - assert(reinterpret_cast(gl_ptr) % alignof(size_t) == 0); - *gl_ptr = gl; + GrowthInfo growth_info() const { + return const_cast(this)->growth_info(); } bool has_infoz() const { @@ -1641,6 +1744,10 @@ template inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) { auto seq = probe(common, hash); const ctrl_t* ctrl = common.control(); + if (IsEmptyOrDeleted(ctrl[seq.offset()]) && + !ShouldInsertBackwards(common.capacity(), hash, ctrl)) { + return {seq.offset(), /*probe_length=*/0}; + } while (true) { GroupFullEmptyOrDeleted g{ctrl + seq.offset()}; auto mask = g.MaskEmptyOrDeleted(); @@ -1664,7 +1771,8 @@ extern template FindInfo find_first_non_full(const CommonFields&, size_t); FindInfo find_first_non_full_outofline(const CommonFields&, size_t); inline void ResetGrowthLeft(CommonFields& common) { - common.set_growth_left(CapacityToGrowth(common.capacity()) - common.size()); + common.growth_info().InitGrowthLeftNoDeleted( + CapacityToGrowth(common.capacity()) - common.size()); } // Sets `ctrl` to `{kEmpty, kSentinel, ..., kEmpty}`, marking the entire @@ -1723,9 +1831,9 @@ inline void SetCtrlInSingleGroupTable(const CommonFields& c, size_t i, h2_t h, SetCtrlInSingleGroupTable(c, i, static_cast(h), slot_size); } -// growth_left (which is a size_t) is stored with the backing array. +// growth_info (which is a size_t) is stored with the backing array. constexpr size_t BackingArrayAlignment(size_t align_of_slot) { - return (std::max)(align_of_slot, alignof(size_t)); + return (std::max)(align_of_slot, alignof(GrowthInfo)); } // Returns the address of the ith slot in slots where each slot occupies @@ -1775,17 +1883,49 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline void IterateOverFullSlots( } } +template +constexpr bool ShouldSampleHashtablezInfo() { + // Folks with custom allocators often make unwarranted assumptions about the + // behavior of their classes vis-a-vis trivial destructability and what + // calls they will or won't make. Avoid sampling for people with custom + // allocators to get us out of this mess. This is not a hard guarantee but + // a workaround while we plan the exact guarantee we want to provide. + return std::is_same>::value; +} + +template +HashtablezInfoHandle SampleHashtablezInfo(size_t sizeof_slot, size_t sizeof_key, + size_t sizeof_value, + size_t old_capacity, bool was_soo, + HashtablezInfoHandle forced_infoz, + CommonFields& c) { + if (forced_infoz.IsSampled()) return forced_infoz; + // In SOO, we sample on the first insertion so if this is an empty SOO case + // (e.g. when reserve is called), then we still need to sample. + if (kSooEnabled && was_soo && c.size() == 0) { + return Sample(sizeof_slot, sizeof_key, sizeof_value, SooCapacity()); + } + // For non-SOO cases, we sample whenever the capacity is increasing from zero + // to non-zero. + if (!kSooEnabled && old_capacity == 0) { + return Sample(sizeof_slot, sizeof_key, sizeof_value, 0); + } + return c.infoz(); +} + // Helper class to perform resize of the hash set. // // It contains special optimizations for small group resizes. // See GrowIntoSingleGroupShuffleControlBytes for details. class HashSetResizeHelper { public: - explicit HashSetResizeHelper(CommonFields& c, bool was_soo, bool had_soo_slot) + explicit HashSetResizeHelper(CommonFields& c, bool was_soo, bool had_soo_slot, + HashtablezInfoHandle forced_infoz) : old_capacity_(c.capacity()), had_infoz_(c.has_infoz()), was_soo_(was_soo), - had_soo_slot_(had_soo_slot) {} + had_soo_slot_(had_soo_slot), + forced_infoz_(forced_infoz) {} // Optimized for small groups version of `find_first_non_full`. // Beneficial only right after calling `raw_hash_set::resize`. @@ -1817,14 +1957,14 @@ class HashSetResizeHelper { } HeapOrSoo& old_heap_or_soo() { return old_heap_or_soo_; } - void* old_soo_data() { return old_heap_or_soo_.soo_data; } + void* old_soo_data() { return old_heap_or_soo_.get_soo_data(); } ctrl_t* old_ctrl() const { assert(!was_soo_); - return old_heap_or_soo_.heap.control; + return old_heap_or_soo_.control(); } void* old_slots() const { assert(!was_soo_); - return old_heap_or_soo_.heap.slot_array.get(); + return old_heap_or_soo_.slot_array().get(); } size_t old_capacity() const { return old_capacity_; } @@ -1869,26 +2009,18 @@ class HashSetResizeHelper { // // Returns IsGrowingIntoSingleGroupApplicable result to avoid recomputation. template + bool SooEnabled, size_t AlignOfSlot> ABSL_ATTRIBUTE_NOINLINE bool InitializeSlots(CommonFields& c, Alloc alloc, - ctrl_t soo_slot_h2) { + ctrl_t soo_slot_h2, + size_t key_size, + size_t value_size) { assert(c.capacity()); - // Folks with custom allocators often make unwarranted assumptions about the - // behavior of their classes vis-a-vis trivial destructability and what - // calls they will or won't make. Avoid sampling for people with custom - // allocators to get us out of this mess. This is not a hard guarantee but - // a workaround while we plan the exact guarantee we want to provide. - const size_t sample_size = - (std::is_same>::value && - (was_soo_ || old_capacity_ == 0)) - ? SizeOfSlot - : 0; - // TODO(b/289225379): when SOO is enabled, we should still sample on first - // insertion and if Sample is non-null, then we should force a heap - // allocation. Note that we'll also have to force capacity of 3 so that - // is_soo() still works. HashtablezInfoHandle infoz = - sample_size > 0 ? Sample(sample_size) : c.infoz(); + ShouldSampleHashtablezInfo() + ? SampleHashtablezInfo(SizeOfSlot, key_size, value_size, + old_capacity_, was_soo_, + forced_infoz_, c) + : HashtablezInfoHandle{}; const bool has_infoz = infoz.IsSampled(); RawHashSetLayout layout(c.capacity(), AlignOfSlot, has_infoz); @@ -1904,12 +2036,13 @@ class HashSetResizeHelper { const bool grow_single_group = IsGrowingIntoSingleGroupApplicable(old_capacity_, layout.capacity()); - if (was_soo_ && grow_single_group) { + if (SooEnabled && was_soo_ && grow_single_group) { InitControlBytesAfterSoo(c.control(), soo_slot_h2, layout.capacity()); if (TransferUsesMemcpy && had_soo_slot_) { TransferSlotAfterSoo(c, SizeOfSlot); } - } else if (old_capacity_ != 0 && grow_single_group) { + // SooEnabled implies that old_capacity_ != 0. + } else if ((SooEnabled || old_capacity_ != 0) && grow_single_group) { if (TransferUsesMemcpy) { GrowSizeIntoSingleGroupTransferable(c, SizeOfSlot); DeallocateOld(alloc, SizeOfSlot); @@ -1923,7 +2056,7 @@ class HashSetResizeHelper { c.set_has_infoz(has_infoz); if (has_infoz) { infoz.RecordStorageChanged(c.size(), layout.capacity()); - if (was_soo_ || grow_single_group || old_capacity_ == 0) { + if ((SooEnabled && was_soo_) || grow_single_group || old_capacity_ == 0) { infoz.RecordRehash(0); } c.set_infoz(infoz); @@ -2063,6 +2196,8 @@ class HashSetResizeHelper { bool had_infoz_; bool was_soo_; bool had_soo_slot_; + // Either null infoz or a pre-sampled forced infoz for SOO tables. + HashtablezInfoHandle forced_infoz_; }; inline void PrepareInsertCommon(CommonFields& common) { @@ -2204,14 +2339,9 @@ class raw_hash_set { bool is_soo() const { return fits_in_soo(capacity()); } bool is_full_soo() const { return is_soo() && !empty(); } - // Give an early error when key_type is not hashable/eq. Definitions are - // provided because otherwise explicit template instantiation fails on MSVC. - auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)) { - ABSL_UNREACHABLE(); - } - auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)) { - ABSL_UNREACHABLE(); - } + // Give an early error when key_type is not hashable/eq. + auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); + auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); using AllocTraits = absl::allocator_traits; using SlotAlloc = typename absl::allocator_traits< @@ -2400,18 +2530,7 @@ class raw_hash_set { const_iterator operator++(int) { return inner_++; } friend bool operator==(const const_iterator& a, const const_iterator& b) { - // Suppress erroneous uninitialized memory errors on GCC. This seems to be - // because the slot pointer in the inner_ iterator is uninitialized, even - // though that pointer is not used when uninitialized. - // Similar bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112637. -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif return a.inner_ == b.inner_; -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic pop -#endif } friend bool operator!=(const const_iterator& a, const const_iterator& b) { return !(a == b); @@ -2561,6 +2680,8 @@ class raw_hash_set { assert(size == 1); common().set_full_soo(); emplace_at(soo_iterator(), *that.begin()); + const HashtablezInfoHandle infoz = try_sample_soo(); + if (infoz.IsSampled()) resize_with_soo_infoz(infoz); return; } assert(!that.is_soo()); @@ -2609,7 +2730,7 @@ class raw_hash_set { infoz().RecordStorageChanged(size, cap); } common().set_size(size); - set_growth_left(growth_left() - size); + growth_info().OverwriteManyEmptyAsFull(size); } ABSL_ATTRIBUTE_NOINLINE raw_hash_set(raw_hash_set&& that) noexcept( @@ -2698,8 +2819,7 @@ class raw_hash_set { size_t size() const { return common().size(); } size_t capacity() const { const size_t cap = common().capacity(); - // Use local variables because compiler complains about using functions in - // assume. + // Compiler complains when using functions in assume so use local variables. ABSL_ATTRIBUTE_UNUSED static constexpr bool kEnabled = SooEnabled(); ABSL_ATTRIBUTE_UNUSED static constexpr size_t kCapacity = SooCapacity(); ABSL_ASSUME(!kEnabled || cap >= kCapacity); @@ -3067,7 +3187,17 @@ class raw_hash_set { SooEnabled()); return; } - if (SooEnabled() && size() <= SooCapacity()) { + if (fits_in_soo(size())) { + // When the table is already sampled, we keep it sampled. + if (infoz().IsSampled()) { + const size_t kInitialSampledCapacity = NextCapacity(SooCapacity()); + if (capacity() > kInitialSampledCapacity) { + resize(kInitialSampledCapacity); + } + // This asserts that we didn't lose sampling coverage in `resize`. + assert(infoz().IsSampled()); + return; + } alignas(slot_type) unsigned char slot_space[sizeof(slot_type)]; slot_type* tmp_slot = to_slot(slot_space); transfer(tmp_slot, begin().slot()); @@ -3338,6 +3468,16 @@ class raw_hash_set { } } + // Conditionally samples hashtablez for SOO tables. This should be called on + // insertion into an empty SOO table and in copy construction when the size + // can fit in SOO capacity. + inline HashtablezInfoHandle try_sample_soo() { + assert(is_soo()); + if (!ShouldSampleHashtablezInfo()) return HashtablezInfoHandle{}; + return Sample(sizeof(slot_type), sizeof(key_type), sizeof(value_type), + SooCapacity()); + } + inline void destroy_slots() { assert(!is_soo()); if (PolicyTraits::template destroy_is_trivial()) return; @@ -3360,7 +3500,9 @@ class raw_hash_set { inline void destructor_impl() { if (capacity() == 0) return; if (is_soo()) { - if (!empty()) destroy(soo_slot()); + if (!empty()) { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(destroy(soo_slot())); + } return; } destroy_slots(); @@ -3390,7 +3532,20 @@ class raw_hash_set { // HashSetResizeHelper::FindFirstNonFullAfterResize( // common(), old_capacity, hash) // can be called right after `resize`. - ABSL_ATTRIBUTE_NOINLINE void resize(size_t new_capacity) { + void resize(size_t new_capacity) { + resize_impl(new_capacity, HashtablezInfoHandle{}); + } + + // As above, except that we also accept a pre-sampled, forced infoz for + // SOO tables, since they need to switch from SOO to heap in order to + // store the infoz. + void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) { + assert(forced_infoz.IsSampled()); + resize_impl(NextCapacity(SooCapacity()), forced_infoz); + } + + ABSL_ATTRIBUTE_NOINLINE void resize_impl( + size_t new_capacity, HashtablezInfoHandle forced_infoz) { assert(IsValidCapacity(new_capacity)); assert(!fits_in_soo(new_capacity)); const bool was_soo = is_soo(); @@ -3398,7 +3553,8 @@ class raw_hash_set { const ctrl_t soo_slot_h2 = had_soo_slot ? static_cast(H2(hash_of(soo_slot()))) : ctrl_t::kEmpty; - HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot); + HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot, + forced_infoz); // Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in // HashSetResizeHelper constructor because it can't transfer slots when // transfer_uses_memcpy is false. @@ -3416,8 +3572,9 @@ class raw_hash_set { const bool grow_single_group = resize_helper.InitializeSlots( - common(), CharAlloc(alloc_ref()), soo_slot_h2); + SooEnabled(), alignof(slot_type)>( + common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type), + sizeof(value_type)); // In the SooEnabled() case, capacity is never 0 so we don't check. if (!SooEnabled() && resize_helper.old_capacity() == 0) { @@ -3637,26 +3794,29 @@ class raw_hash_set { return move_elements_allocs_unequal(std::move(that)); } - protected: - // Attempts to find `key` in the table; if it isn't found, returns an iterator - // where the value can be inserted into, with the control byte already set to - // `key`'s H2. Returns a bool indicating whether an insertion can take place. template - std::pair find_or_prepare_insert(const K& key) { - if (is_soo()) { - if (empty()) { + std::pair find_or_prepare_insert_soo(const K& key) { + if (empty()) { + const HashtablezInfoHandle infoz = try_sample_soo(); + if (infoz.IsSampled()) { + resize_with_soo_infoz(infoz); + } else { common().set_full_soo(); return {soo_iterator(), true}; } - if (PolicyTraits::apply(EqualElement{key, eq_ref()}, - PolicyTraits::element(soo_slot()))) { - return {soo_iterator(), false}; - } + } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, + PolicyTraits::element(soo_slot()))) { + return {soo_iterator(), false}; + } else { resize(NextCapacity(SooCapacity())); - const size_t index = - PrepareInsertAfterSoo(hash_ref()(key), sizeof(slot_type), common()); - return {iterator_at(index), true}; } + const size_t index = + PrepareInsertAfterSoo(hash_ref()(key), sizeof(slot_type), common()); + return {iterator_at(index), true}; + } + + template + std::pair find_or_prepare_insert_non_soo(const K& key) { prefetch_heap_block(); auto hash = hash_ref()(key); auto seq = probe(common(), hash); @@ -3669,44 +3829,81 @@ class raw_hash_set { PolicyTraits::element(slot_array() + seq.offset(i))))) return {iterator_at(seq.offset(i)), false}; } - if (ABSL_PREDICT_TRUE(g.MaskEmpty())) break; + auto mask_empty = g.MaskEmpty(); + if (ABSL_PREDICT_TRUE(mask_empty)) { + size_t target = seq.offset( + GetInsertionOffset(mask_empty, capacity(), hash, control())); + return { + iterator_at(prepare_insert(hash, FindInfo{target, seq.index()})), + true}; + } seq.next(); assert(seq.index() <= capacity() && "full table!"); } - return {iterator_at(prepare_insert(hash)), true}; } - // Given the hash of a value not currently in the table, finds the next - // viable slot index to insert it at. + protected: + // Attempts to find `key` in the table; if it isn't found, returns an iterator + // where the value can be inserted into, with the control byte already set to + // `key`'s H2. Returns a bool indicating whether an insertion can take place. + template + std::pair find_or_prepare_insert(const K& key) { + if (is_soo()) return find_or_prepare_insert_soo(key); + return find_or_prepare_insert_non_soo(key); + } + + // Given the hash of a value not currently in the table and the first empty + // slot in the probe sequence, finds the viable slot index to insert it at. + // + // In case there's no space left, the table can be resized or rehashed + // (see rehash_and_grow_if_necessary). + // + // In case of absence of deleted slots and positive growth_left, element can + // be inserted in the provided `target` position. + // + // When table has deleted slots (according to GrowthInfo), target position + // will be searched one more time using `find_first_non_full`. // // REQUIRES: At least one non-full slot available. - size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE { + // REQUIRES: `target` is a valid empty position to insert. + size_t prepare_insert(size_t hash, FindInfo target) ABSL_ATTRIBUTE_NOINLINE { assert(!is_soo()); - const bool rehash_for_bug_detection = - common().should_rehash_for_bug_detection_on_insert(); - if (rehash_for_bug_detection) { - // Move to a different heap allocation in order to detect bugs. - const size_t cap = capacity(); - resize(growth_left() > 0 ? cap : NextCapacity(cap)); - } - FindInfo target; - if (!rehash_for_bug_detection && ABSL_PREDICT_FALSE(growth_left() == 0)) { - const size_t old_capacity = capacity(); - rehash_and_grow_if_necessary(); - // NOTE: It is safe to use `FindFirstNonFullAfterResize` after - // `rehash_and_grow_if_necessary`, whether capacity changes or not. - // `rehash_and_grow_if_necessary` may *not* call `resize` - // and perform `drop_deletes_without_resize` instead. But this - // could happen only on big tables and will not change capacity. - // For big tables `FindFirstNonFullAfterResize` will always - // fallback to normal `find_first_non_full`. - target = HashSetResizeHelper::FindFirstNonFullAfterResize( - common(), old_capacity, hash); - } else { - target = find_first_non_full(common(), hash); + // When there are no deleted slots in the table + // and growth_left is positive, we can insert at the first + // empty slot in the probe sequence (target). + bool use_target_hint = false; + // Optimization is disabled on enabled generations. + // We have to rehash even sparse tables randomly in such mode. +#ifndef ABSL_SWISSTABLE_ENABLE_GENERATIONS + use_target_hint = growth_info().HasNoDeletedAndGrowthLeft(); +#endif + if (ABSL_PREDICT_FALSE(!use_target_hint)) { + const bool rehash_for_bug_detection = + common().should_rehash_for_bug_detection_on_insert(); + if (rehash_for_bug_detection) { + // Move to a different heap allocation in order to detect bugs. + const size_t cap = capacity(); + resize(growth_left() > 0 ? cap : NextCapacity(cap)); + } + if (!rehash_for_bug_detection && + ABSL_PREDICT_FALSE(growth_left() == 0)) { + const size_t old_capacity = capacity(); + rehash_and_grow_if_necessary(); + // NOTE: It is safe to use `FindFirstNonFullAfterResize` after + // `rehash_and_grow_if_necessary`, whether capacity changes or not. + // `rehash_and_grow_if_necessary` may *not* call `resize` + // and perform `drop_deletes_without_resize` instead. But this + // could happen only on big tables and will not change capacity. + // For big tables `FindFirstNonFullAfterResize` will always + // fallback to normal `find_first_non_full`. + target = HashSetResizeHelper::FindFirstNonFullAfterResize( + common(), old_capacity, hash); + } else { + target = find_first_non_full(common(), hash); + } } PrepareInsertCommon(common()); - set_growth_left(growth_left() - IsEmpty(control()[target.offset])); + growth_info().OverwriteControlAsFull(control()[target.offset]); SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); infoz().RecordInsert(hash, target.probe_length); return target.offset; @@ -3735,9 +3932,7 @@ class raw_hash_set { return const_cast(this)->iterator_at(i); } - reference unchecked_deref(iterator it) { - return const_cast(it.unchecked_deref()); - } + reference unchecked_deref(iterator it) { return it.unchecked_deref(); } private: friend struct RawHashSetTestOnlyAccess; @@ -3754,11 +3949,16 @@ class raw_hash_set { // See `CapacityToGrowth()`. size_t growth_left() const { assert(!is_soo()); - return common().growth_left(); + return growth_info().GetGrowthLeft(); } - void set_growth_left(size_t gl) { + + GrowthInfo& growth_info() { assert(!is_soo()); - return common().set_growth_left(gl); + return common().growth_info(); + } + GrowthInfo growth_info() const { + assert(!is_soo()); + return common().growth_info(); } // Prefetch the heap-allocated memory region to resolve potential TLB and @@ -3918,5 +4118,7 @@ ABSL_NAMESPACE_END } // namespace absl #undef ABSL_SWISSTABLE_ENABLE_GENERATIONS +#undef ABSL_SWISSTABLE_IGNORE_UNINITIALIZED +#undef ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN #endif // ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ diff --git a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_benchmark.cc b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_benchmark.cc index 0fa9c71279..424b72cffc 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_benchmark.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_benchmark.cc @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include #include +#include #include #include #include @@ -27,6 +29,7 @@ #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" #include "absl/container/internal/raw_hash_set.h" +#include "absl/random/random.h" #include "absl/strings/str_format.h" #include "benchmark/benchmark.h" @@ -575,6 +578,67 @@ void BM_Resize(benchmark::State& state) { } BENCHMARK(BM_Resize); +void BM_EraseIf(benchmark::State& state) { + int64_t num_elements = state.range(0); + size_t num_erased = static_cast(state.range(1)); + + constexpr size_t kRepetitions = 64; + + absl::BitGen rng; + + std::vector> keys(kRepetitions); + std::vector tables; + std::vector threshold; + for (auto& k : keys) { + tables.push_back(IntTable()); + auto& table = tables.back(); + for (int64_t i = 0; i < num_elements; i++) { + // We use random keys to reduce noise. + k.push_back( + absl::Uniform(rng, 0, std::numeric_limits::max())); + if (!table.insert(k.back()).second) { + k.pop_back(); + --i; // duplicated value, retrying + } + } + std::sort(k.begin(), k.end()); + threshold.push_back(static_cast(num_erased) < num_elements + ? k[num_erased] + : std::numeric_limits::max()); + } + + while (state.KeepRunningBatch(static_cast(kRepetitions) * + std::max(num_elements, int64_t{1}))) { + benchmark::DoNotOptimize(tables); + for (size_t t_id = 0; t_id < kRepetitions; t_id++) { + auto& table = tables[t_id]; + benchmark::DoNotOptimize(num_erased); + auto pred = [t = threshold[t_id]](int64_t key) { return key < t; }; + benchmark::DoNotOptimize(pred); + benchmark::DoNotOptimize(table); + absl::container_internal::EraseIf(pred, &table); + } + state.PauseTiming(); + for (size_t t_id = 0; t_id < kRepetitions; t_id++) { + auto& k = keys[t_id]; + auto& table = tables[t_id]; + for (size_t i = 0; i < num_erased; i++) { + table.insert(k[i]); + } + } + state.ResumeTiming(); + } +} + +BENCHMARK(BM_EraseIf) + ->ArgNames({"num_elements", "num_erased"}) + ->ArgPair(10, 0) + ->ArgPair(1000, 0) + ->ArgPair(10, 5) + ->ArgPair(1000, 500) + ->ArgPair(10, 10) + ->ArgPair(1000, 1000); + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_test.cc b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_test.cc index 9180db5937..c4e05d6019 100644 --- a/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_test.cc +++ b/yass/third_party/abseil-cpp/absl/container/internal/raw_hash_set_test.cc @@ -52,7 +52,9 @@ #include "absl/container/internal/hashtable_debug.h" #include "absl/container/internal/hashtablez_sampler.h" #include "absl/container/internal/test_allocator.h" +#include "absl/functional/function_ref.h" #include "absl/hash/hash.h" +#include "absl/log/check.h" #include "absl/log/log.h" #include "absl/memory/memory.h" #include "absl/meta/type_traits.h" @@ -90,6 +92,78 @@ using ::testing::UnorderedElementsAre; // Convenience function to static cast to ctrl_t. ctrl_t CtrlT(int i) { return static_cast(i); } +TEST(GrowthInfoTest, GetGrowthLeft) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + EXPECT_EQ(gi.GetGrowthLeft(), 5); + gi.OverwriteFullAsDeleted(); + EXPECT_EQ(gi.GetGrowthLeft(), 5); +} + +TEST(GrowthInfoTest, HasNoDeleted) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeleted()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoDeleted()); + // After reinitialization we have no deleted slots. + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeleted()); +} + +TEST(GrowthInfoTest, HasNoDeletedAndGrowthLeft) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + gi.InitGrowthLeftNoDeleted(0); + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + // After reinitialization we have no deleted slots. + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft()); +} + +TEST(GrowthInfoTest, OverwriteFullAsEmpty) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + gi.OverwriteFullAsEmpty(); + EXPECT_EQ(gi.GetGrowthLeft(), 6); + gi.OverwriteFullAsDeleted(); + EXPECT_EQ(gi.GetGrowthLeft(), 6); + gi.OverwriteFullAsEmpty(); + EXPECT_EQ(gi.GetGrowthLeft(), 7); + EXPECT_FALSE(gi.HasNoDeleted()); +} + +TEST(GrowthInfoTest, OverwriteEmptyAsFull) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + gi.OverwriteEmptyAsFull(); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteFullAsDeleted(); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteEmptyAsFull(); + EXPECT_EQ(gi.GetGrowthLeft(), 3); + EXPECT_FALSE(gi.HasNoDeleted()); +} + +TEST(GrowthInfoTest, OverwriteControlAsFull) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + gi.OverwriteControlAsFull(ctrl_t::kEmpty); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteControlAsFull(ctrl_t::kDeleted); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteFullAsDeleted(); + gi.OverwriteControlAsFull(ctrl_t::kDeleted); + // We do not count number of deleted, so the bit sticks till the next rehash. + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + EXPECT_FALSE(gi.HasNoDeleted()); +} + TEST(Util, NormalizeCapacity) { EXPECT_EQ(1, NormalizeCapacity(0)); EXPECT_EQ(1, NormalizeCapacity(1)); @@ -885,12 +959,17 @@ TYPED_TEST_P(SmallTableResizeTest, ResizeReduceSmallTables) { for (size_t source_size = 0; source_size < 32; ++source_size) { for (size_t target_size = 0; target_size <= source_size; ++target_size) { TypeParam t; - t.reserve(source_size); size_t inserted_count = std::min(source_size, 5); for (size_t i = 0; i < inserted_count; ++i) { t.insert(static_cast(i)); } + const size_t minimum_capacity = t.capacity(); + t.reserve(source_size); t.rehash(target_size); + if (target_size == 0) { + EXPECT_EQ(t.capacity(), minimum_capacity) + << "rehash(0) must resize to the minimum capacity"; + } for (size_t i = 0; i < inserted_count; ++i) { EXPECT_TRUE(t.find(static_cast(i)) != t.end()); EXPECT_EQ(*t.find(static_cast(i)), static_cast(i)); @@ -1746,6 +1825,27 @@ TEST(Table, EraseInsertProbing) { EXPECT_THAT(t, UnorderedElementsAre(1, 10, 3, 11, 12)); } +TEST(Table, GrowthInfoDeletedBit) { + BadTable t; + EXPECT_TRUE( + RawHashSetTestOnlyAccess::GetCommon(t).growth_info().HasNoDeleted()); + int64_t init_count = static_cast( + CapacityToGrowth(NormalizeCapacity(Group::kWidth + 1))); + for (int64_t i = 0; i < init_count; ++i) { + t.insert(i); + } + EXPECT_TRUE( + RawHashSetTestOnlyAccess::GetCommon(t).growth_info().HasNoDeleted()); + t.erase(0); + EXPECT_EQ(RawHashSetTestOnlyAccess::CountTombstones(t), 1); + EXPECT_FALSE( + RawHashSetTestOnlyAccess::GetCommon(t).growth_info().HasNoDeleted()); + t.rehash(0); + EXPECT_EQ(RawHashSetTestOnlyAccess::CountTombstones(t), 0); + EXPECT_TRUE( + RawHashSetTestOnlyAccess::GetCommon(t).growth_info().HasNoDeleted()); +} + TYPED_TEST_P(SooTest, Clear) { TypeParam t; EXPECT_TRUE(t.find(0) == t.end()); @@ -2030,6 +2130,9 @@ TYPED_TEST_P(SooTest, FindFullDeletedRegression) { } TYPED_TEST_P(SooTest, ReplacingDeletedSlotDoesNotRehash) { + // We need to disable hashtablez to avoid issues related to SOO and sampling. + SetHashtablezEnabled(false); + size_t n; { // Compute n such that n is the maximum number of elements before rehash. @@ -2383,6 +2486,9 @@ TYPED_TEST_P(SooTest, IterationOrderChangesOnRehash) { // Verify that pointers are invalidated as soon as a second element is inserted. // This prevents dependency on pointer stability on small tables. TYPED_TEST_P(SooTest, UnstablePointers) { + // We need to disable hashtablez to avoid issues related to SOO and sampling. + SetHashtablezEnabled(false); + TypeParam table; const auto addr = [&](int i) { @@ -2518,7 +2624,7 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { constexpr bool soo_enabled = std::is_same::value; // Enable the feature even if the prod default is off. SetHashtablezEnabled(true); - SetHashtablezSampleParameter(100); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. auto& sampler = GlobalHashtablezSampler(); size_t start_size = 0; @@ -2552,25 +2658,28 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { absl::flat_hash_map observed_checksums; absl::flat_hash_map reservations; end_size += sampler.Iterate([&](const HashtablezInfo& info) { - if (preexisting_info.count(&info) == 0) { - observed_checksums[info.hashes_bitwise_xor.load( - std::memory_order_relaxed)]++; - reservations[info.max_reserve.load(std::memory_order_relaxed)]++; - } - EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); ++end_size; + if (preexisting_info.contains(&info)) return; + observed_checksums[info.hashes_bitwise_xor.load( + std::memory_order_relaxed)]++; + reservations[info.max_reserve.load(std::memory_order_relaxed)]++; + EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); + EXPECT_EQ(info.key_size, sizeof(typename TypeParam::key_type)); + EXPECT_EQ(info.value_size, sizeof(typename TypeParam::value_type)); + + if (soo_enabled) { + EXPECT_EQ(info.soo_capacity, SooCapacity()); + } else { + EXPECT_EQ(info.soo_capacity, 0); + } }); + // Expect that we sampled at the requested sampling rate of ~1%. EXPECT_NEAR((end_size - start_size) / static_cast(tables.size()), 0.01, 0.005); - if (soo_enabled) { - EXPECT_EQ(observed_checksums.size(), 9); - } else { - EXPECT_EQ(observed_checksums.size(), 5); - for (const auto& [_, count] : observed_checksums) { - EXPECT_NEAR((100 * count) / static_cast(tables.size()), 0.2, - 0.05); - } + EXPECT_EQ(observed_checksums.size(), 5); + for (const auto& [_, count] : observed_checksums) { + EXPECT_NEAR((100 * count) / static_cast(tables.size()), 0.2, 0.05); } EXPECT_EQ(reservations.size(), 10); @@ -2586,12 +2695,141 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { REGISTER_TYPED_TEST_SUITE_P(RawHashSamplerTest, Sample); using RawHashSamplerTestTypes = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(My, RawHashSamplerTest, RawHashSamplerTestTypes); + +std::vector SampleSooMutation( + absl::FunctionRef mutate_table) { + // Enable the feature even if the prod default is off. + SetHashtablezEnabled(true); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. + + auto& sampler = GlobalHashtablezSampler(); + size_t start_size = 0; + absl::flat_hash_set preexisting_info; + start_size += sampler.Iterate([&](const HashtablezInfo& info) { + preexisting_info.insert(&info); + ++start_size; + }); + + std::vector tables; + for (int i = 0; i < 1000000; ++i) { + tables.emplace_back(); + mutate_table(tables.back()); + } + size_t end_size = 0; + std::vector infos; + end_size += sampler.Iterate([&](const HashtablezInfo& info) { + ++end_size; + if (preexisting_info.contains(&info)) return; + infos.push_back(&info); + }); + + // Expect that we sampled at the requested sampling rate of ~1%. + EXPECT_NEAR((end_size - start_size) / static_cast(tables.size()), + 0.01, 0.005); + return infos; +} + +TEST(RawHashSamplerTest, SooTableInsertToEmpty) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { t.insert(1); }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_EQ(info->capacity, NextCapacity(SooCapacity())); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 0); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +TEST(RawHashSamplerTest, SooTableReserveToEmpty) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { t.reserve(100); }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_GE(info->capacity, 100); + ASSERT_EQ(info->size, 0); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +// This tests that reserve on a full SOO table doesn't incorrectly result in new +// (over-)sampling. +TEST(RawHashSamplerTest, SooTableReserveToFullSoo) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { + t.insert(1); + t.reserve(100); + }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_GE(info->capacity, 100); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +// This tests that rehash(0) on a sampled table with size that fits in SOO +// doesn't incorrectly result in losing sampling. +TEST(RawHashSamplerTest, SooTableRehashShrinkWhenSizeFitsInSoo) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { + t.reserve(100); + t.insert(1); + EXPECT_GE(t.capacity(), 100); + t.rehash(0); + }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_EQ(info->capacity, NextCapacity(SooCapacity())); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} #endif // ABSL_INTERNAL_HASHTABLEZ_SAMPLE TEST(RawHashSamplerTest, DoNotSampleCustomAllocators) { // Enable the feature even if the prod default is off. SetHashtablezEnabled(true); - SetHashtablezSampleParameter(100); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. auto& sampler = GlobalHashtablezSampler(); size_t start_size = 0; @@ -2969,7 +3207,7 @@ TYPED_TEST_P(SooTable, Basic) { bool frozen = true; TypeParam t{FreezableAlloc(&frozen)}; if (t.capacity() != SooCapacity()) { - EXPECT_LT(sizeof(void*), 8); + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; GTEST_SKIP() << "not SOO on this platform"; } @@ -3001,14 +3239,18 @@ using FreezableSooTableTypes = FreezableSizedValueSooTable<16>>; INSTANTIATE_TYPED_TEST_SUITE_P(My, SooTable, FreezableSooTableTypes); -TEST(Table, RehashToSoo) { +TEST(Table, RehashToSooUnsampled) { SooIntTable t; if (t.capacity() != SooCapacity()) { - EXPECT_LT(sizeof(void*), 8); + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; GTEST_SKIP() << "not SOO on this platform"; } - t.reserve(10); + // We disable hashtablez sampling for this test to ensure that the table isn't + // sampled. When the table is sampled, it won't rehash down to SOO. + SetHashtablezEnabled(false); + + t.reserve(100); t.insert(0); EXPECT_EQ(*t.begin(), 0); @@ -3021,7 +3263,7 @@ TEST(Table, RehashToSoo) { EXPECT_EQ(t.find(1), t.end()); } -TEST(Table, ResizeToNonSoo) { +TEST(Table, ReserveToNonSoo) { for (int reserve_capacity : {8, 100000}) { SooIntTable t; t.insert(0); diff --git a/yass/third_party/abseil-cpp/absl/container/node_hash_map.cc b/yass/third_party/abseil-cpp/absl/container/node_hash_map.cc deleted file mode 100644 index ccf8c59923..0000000000 --- a/yass/third_party/abseil-cpp/absl/container/node_hash_map.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2024 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/container/node_hash_map.h" - -#include -#include - -#include "absl/base/config.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN - -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, int32_t, int32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, std::string, int32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, int32_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, int64_t, int64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, std::string, int64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, int64_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, uint32_t, uint32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, std::string, uint32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, uint32_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, uint64_t, uint64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, std::string, uint64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, uint64_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(template, std::string, std::string); - -ABSL_NAMESPACE_END -} // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/node_hash_map.h b/yass/third_party/abseil-cpp/absl/container/node_hash_map.h index acc2154268..cb41543c93 100644 --- a/yass/third_party/abseil-cpp/absl/container/node_hash_map.h +++ b/yass/third_party/abseil-cpp/absl/container/node_hash_map.h @@ -37,15 +37,11 @@ #define ABSL_CONTAINER_NODE_HASH_MAP_H_ #include -#include -#include -#include #include #include #include #include "absl/algorithm/container.h" -#include "absl/base/config.h" #include "absl/base/macros.h" #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" // IWYU pragma: export @@ -627,42 +623,6 @@ struct IsUnorderedContainer< } // namespace container_algorithm_internal -// Explicit template instantiations for common map types in order to decrease -// linker input size. Note that explicitly instantiating node_hash_map itself -// doesn't help because it has no non-alias members. If we need to decrease -// linker input size more, we could potentially (a) add more key/value types, -// e.g. string_view/Cord, (b) instantiate some template member functions, e.g. -// operator[]/find. The EXTERN argument is `extern` for the declaration and -// empty for the definition. -#define ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(TEMPLATE, KEY, VALUE) \ - TEMPLATE class absl::container_internal::raw_hash_map< \ - absl::container_internal::NodeHashMapPolicy, \ - absl::container_internal::hash_default_hash, \ - absl::container_internal::hash_default_eq, \ - std::allocator>>; \ - TEMPLATE class absl::container_internal::raw_hash_set< \ - absl::container_internal::NodeHashMapPolicy, \ - absl::container_internal::hash_default_hash, \ - absl::container_internal::hash_default_eq, \ - std::allocator>>; - -// We use exact-width integer types rather than `int`/`long`/`long long` because -// these are the types recommended in the Google C++ style guide and which are -// commonly used in Google code. -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, int32_t, int32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, std::string, int32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, int32_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, int64_t, int64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, std::string, int64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, int64_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, uint32_t, uint32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, std::string, uint32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, uint32_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, uint64_t, uint64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, std::string, uint64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, uint64_t, std::string); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_MAP(extern template, std::string, std::string); - ABSL_NAMESPACE_END } // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/node_hash_set.cc b/yass/third_party/abseil-cpp/absl/container/node_hash_set.cc deleted file mode 100644 index 39226c616b..0000000000 --- a/yass/third_party/abseil-cpp/absl/container/node_hash_set.cc +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2024 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/container/node_hash_set.h" - -#include -#include - -#include "absl/base/config.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN - -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, int8_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, int16_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, int32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, int64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, uint8_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, uint16_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, uint32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, uint64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(template, std::string); - -ABSL_NAMESPACE_END -} // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/container/node_hash_set.h b/yass/third_party/abseil-cpp/absl/container/node_hash_set.h index 77b49f9b14..8cc4b62407 100644 --- a/yass/third_party/abseil-cpp/absl/container/node_hash_set.h +++ b/yass/third_party/abseil-cpp/absl/container/node_hash_set.h @@ -36,13 +36,9 @@ #define ABSL_CONTAINER_NODE_HASH_SET_H_ #include -#include -#include -#include #include #include "absl/algorithm/container.h" -#include "absl/base/config.h" #include "absl/base/macros.h" #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" // IWYU pragma: export @@ -522,32 +518,6 @@ struct IsUnorderedContainer> : std::true_type {}; } // namespace container_algorithm_internal - -// Explicit template instantiations for common set types in order to decrease -// linker input size. Note that explicitly instantiating node_hash_set itself -// doesn't help because it has no non-alias members. If we need to decrease -// linker input size more, we could potentially (a) add more key types, e.g. -// string_view/Cord, (b) instantiate some template member functions, e.g. -// find/insert/emplace. -#define ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(TEMPLATE, KEY) \ - TEMPLATE class absl::container_internal::raw_hash_set< \ - absl::container_internal::NodeHashSetPolicy, \ - absl::container_internal::hash_default_hash, \ - absl::container_internal::hash_default_eq, std::allocator>; - -// We use exact-width integer types rather than `int`/`long`/`long long` because -// these are the types recommended in the Google C++ style guide and which are -// commonly used in Google code. -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, int8_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, int16_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, int32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, int64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, uint8_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, uint16_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, uint32_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, uint64_t); -ABSL_INTERNAL_TEMPLATE_NODE_HASH_SET(extern template, std::string); - ABSL_NAMESPACE_END } // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/copts/GENERATED_AbseilCopts.cmake b/yass/third_party/abseil-cpp/absl/copts/GENERATED_AbseilCopts.cmake index f90bff799b..d96bd8394b 100644 --- a/yass/third_party/abseil-cpp/absl/copts/GENERATED_AbseilCopts.cmake +++ b/yass/third_party/abseil-cpp/absl/copts/GENERATED_AbseilCopts.cmake @@ -44,6 +44,7 @@ list(APPEND ABSL_GCC_FLAGS "-Wconversion-null" "-Wformat-security" "-Wmissing-declarations" + "-Wnon-virtual-dtor" "-Woverlength-strings" "-Wpointer-arith" "-Wundef" @@ -61,6 +62,7 @@ list(APPEND ABSL_GCC_TEST_FLAGS "-Wcast-qual" "-Wconversion-null" "-Wformat-security" + "-Wnon-virtual-dtor" "-Woverlength-strings" "-Wpointer-arith" "-Wundef" @@ -82,9 +84,11 @@ list(APPEND ABSL_GCC_TEST_FLAGS list(APPEND ABSL_LLVM_FLAGS "-Wall" "-Wextra" + "-Wc++98-compat-extra-semi" "-Wcast-qual" "-Wconversion" "-Wdead-code-aggressive" + "-Wdeprecated-pragma" "-Wfloat-overflow-conversion" "-Wfloat-zero-conversion" "-Wfor-loop-analysis" @@ -121,9 +125,11 @@ list(APPEND ABSL_LLVM_FLAGS list(APPEND ABSL_LLVM_TEST_FLAGS "-Wall" "-Wextra" + "-Wc++98-compat-extra-semi" "-Wcast-qual" "-Wconversion" "-Wdead-code-aggressive" + "-Wdeprecated-pragma" "-Wfloat-overflow-conversion" "-Wfloat-zero-conversion" "-Wfor-loop-analysis" diff --git a/yass/third_party/abseil-cpp/absl/copts/GENERATED_copts.bzl b/yass/third_party/abseil-cpp/absl/copts/GENERATED_copts.bzl index 3a659529fd..0a34423668 100644 --- a/yass/third_party/abseil-cpp/absl/copts/GENERATED_copts.bzl +++ b/yass/third_party/abseil-cpp/absl/copts/GENERATED_copts.bzl @@ -45,6 +45,7 @@ ABSL_GCC_FLAGS = [ "-Wconversion-null", "-Wformat-security", "-Wmissing-declarations", + "-Wnon-virtual-dtor", "-Woverlength-strings", "-Wpointer-arith", "-Wundef", @@ -62,6 +63,7 @@ ABSL_GCC_TEST_FLAGS = [ "-Wcast-qual", "-Wconversion-null", "-Wformat-security", + "-Wnon-virtual-dtor", "-Woverlength-strings", "-Wpointer-arith", "-Wundef", @@ -83,9 +85,11 @@ ABSL_GCC_TEST_FLAGS = [ ABSL_LLVM_FLAGS = [ "-Wall", "-Wextra", + "-Wc++98-compat-extra-semi", "-Wcast-qual", "-Wconversion", "-Wdead-code-aggressive", + "-Wdeprecated-pragma", "-Wfloat-overflow-conversion", "-Wfloat-zero-conversion", "-Wfor-loop-analysis", @@ -122,9 +126,11 @@ ABSL_LLVM_FLAGS = [ ABSL_LLVM_TEST_FLAGS = [ "-Wall", "-Wextra", + "-Wc++98-compat-extra-semi", "-Wcast-qual", "-Wconversion", "-Wdead-code-aggressive", + "-Wdeprecated-pragma", "-Wfloat-overflow-conversion", "-Wfloat-zero-conversion", "-Wfor-loop-analysis", diff --git a/yass/third_party/abseil-cpp/absl/copts/copts.py b/yass/third_party/abseil-cpp/absl/copts/copts.py index 946ceb86c1..1aa0924922 100644 --- a/yass/third_party/abseil-cpp/absl/copts/copts.py +++ b/yass/third_party/abseil-cpp/absl/copts/copts.py @@ -18,6 +18,7 @@ ABSL_GCC_FLAGS = [ "-Wconversion-null", "-Wformat-security", "-Wmissing-declarations", + "-Wnon-virtual-dtor", "-Woverlength-strings", "-Wpointer-arith", "-Wundef", @@ -43,9 +44,11 @@ ABSL_GCC_TEST_ADDITIONAL_FLAGS = [ ABSL_LLVM_FLAGS = [ "-Wall", "-Wextra", + "-Wc++98-compat-extra-semi", "-Wcast-qual", "-Wconversion", "-Wdead-code-aggressive", + "-Wdeprecated-pragma", "-Wfloat-overflow-conversion", "-Wfloat-zero-conversion", "-Wfor-loop-analysis", diff --git a/yass/third_party/abseil-cpp/absl/crc/BUILD.bazel b/yass/third_party/abseil-cpp/absl/crc/BUILD.bazel index 9dc81819d3..890d637c5f 100644 --- a/yass/third_party/abseil-cpp/absl/crc/BUILD.bazel +++ b/yass/third_party/abseil-cpp/absl/crc/BUILD.bazel @@ -121,7 +121,9 @@ cc_library( hdrs = ["internal/non_temporal_arm_intrinsics.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//visibility:private"], + visibility = [ + ":__pkg__", + ], deps = [ "//absl/base:config", ], @@ -132,7 +134,9 @@ cc_library( hdrs = ["internal/non_temporal_memcpy.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//visibility:private"], + visibility = [ + ":__pkg__", + ], deps = [ ":non_temporal_arm_intrinsics", "//absl/base:config", diff --git a/yass/third_party/abseil-cpp/absl/crc/internal/crc32_x86_arm_combined_simd.h b/yass/third_party/abseil-cpp/absl/crc/internal/crc32_x86_arm_combined_simd.h index 1938a9b2c9..aa6a65954f 100644 --- a/yass/third_party/abseil-cpp/absl/crc/internal/crc32_x86_arm_combined_simd.h +++ b/yass/third_party/abseil-cpp/absl/crc/internal/crc32_x86_arm_combined_simd.h @@ -102,10 +102,11 @@ V128 V128_Xor(const V128 l, const V128 r); // Produces an AND operation of |l| and |r|. V128 V128_And(const V128 l, const V128 r); -// Sets two 64 bit integers to one 128 bit vector. The order is reverse. +// Sets the lower half of a 128 bit register to the given 64-bit value and +// zeroes the upper half. // dst[63:0] := |r| -// dst[127:64] := |l| -V128 V128_From2x64(const uint64_t l, const uint64_t r); +// dst[127:64] := |0| +V128 V128_From64WithZeroFill(const uint64_t r); // Shift |l| right by |imm| bytes while shifting in zeros. template @@ -171,8 +172,8 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return _mm_xor_si128(l, r); } inline V128 V128_And(const V128 l, const V128 r) { return _mm_and_si128(l, r); } -inline V128 V128_From2x64(const uint64_t l, const uint64_t r) { - return _mm_set_epi64x(static_cast(l), static_cast(r)); +inline V128 V128_From64WithZeroFill(const uint64_t r) { + return _mm_set_epi64x(static_cast(0), static_cast(r)); } template @@ -262,10 +263,12 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return veorq_u64(l, r); } inline V128 V128_And(const V128 l, const V128 r) { return vandq_u64(l, r); } -inline V128 V128_From2x64(const uint64_t l, const uint64_t r) { - return vcombine_u64(vcreate_u64(r), vcreate_u64(l)); +inline V128 V128_From64WithZeroFill(const uint64_t r){ + constexpr uint64x2_t kZero = {0, 0}; + return vsetq_lane_u64(r, kZero, 0); } + template inline V128 V128_ShiftRight(const V128 l) { return vreinterpretq_u64_s8( diff --git a/yass/third_party/abseil-cpp/absl/crc/internal/crc_memcpy_fallback.cc b/yass/third_party/abseil-cpp/absl/crc/internal/crc_memcpy_fallback.cc index 07795504e3..e34249f07d 100644 --- a/yass/third_party/abseil-cpp/absl/crc/internal/crc_memcpy_fallback.cc +++ b/yass/third_party/abseil-cpp/absl/crc/internal/crc_memcpy_fallback.cc @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include "absl/base/config.h" #include "absl/crc/crc32c.h" #include "absl/crc/internal/crc_memcpy.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN diff --git a/yass/third_party/abseil-cpp/absl/crc/internal/crc_non_temporal_memcpy.cc b/yass/third_party/abseil-cpp/absl/crc/internal/crc_non_temporal_memcpy.cc index adc867f6b7..a56f1eb0e0 100644 --- a/yass/third_party/abseil-cpp/absl/crc/internal/crc_non_temporal_memcpy.cc +++ b/yass/third_party/abseil-cpp/absl/crc/internal/crc_non_temporal_memcpy.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include "absl/base/config.h" #include "absl/crc/crc32c.h" diff --git a/yass/third_party/abseil-cpp/absl/crc/internal/crc_x86_arm_combined.cc b/yass/third_party/abseil-cpp/absl/crc/internal/crc_x86_arm_combined.cc index 51eff4eddc..20dd3e017a 100644 --- a/yass/third_party/abseil-cpp/absl/crc/internal/crc_x86_arm_combined.cc +++ b/yass/third_party/abseil-cpp/absl/crc/internal/crc_x86_arm_combined.cc @@ -101,9 +101,9 @@ constexpr size_t kMediumCutoff = 2048; namespace { uint32_t multiply(uint32_t a, uint32_t b) { - V128 shifts = V128_From2x64(0, 1); - V128 power = V128_From2x64(0, a); - V128 crc = V128_From2x64(0, b); + V128 shifts = V128_From64WithZeroFill(1); + V128 power = V128_From64WithZeroFill(a); + V128 crc = V128_From64WithZeroFill(b); V128 res = V128_PMulLow(power, crc); // Combine crc values @@ -444,11 +444,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams V128 magic = *(reinterpret_cast(kClmulConstants) + bs - 1); - V128 tmp = V128_From2x64(0, l64); + V128 tmp = V128_From64WithZeroFill(l64); V128 res1 = V128_PMulLow(tmp, magic); - tmp = V128_From2x64(0, l641); + tmp = V128_From64WithZeroFill(l641); V128 res2 = V128_PMul10(tmp, magic); V128 x = V128_Xor(res1, res2); diff --git a/yass/third_party/abseil-cpp/absl/crc/internal/non_temporal_memcpy.h b/yass/third_party/abseil-cpp/absl/crc/internal/non_temporal_memcpy.h index b3d94badfc..7ae83bdcbd 100644 --- a/yass/third_party/abseil-cpp/absl/crc/internal/non_temporal_memcpy.h +++ b/yass/third_party/abseil-cpp/absl/crc/internal/non_temporal_memcpy.h @@ -19,19 +19,8 @@ #include #endif -#ifdef __SSE__ -#include -#endif - -#ifdef __SSE2__ -#include -#endif - -#ifdef __SSE3__ -#include -#endif - -#ifdef __AVX__ +#if defined(__SSE__) || defined(__AVX__) +// Pulls in both SSE and AVX intrinsics. #include #endif @@ -44,6 +33,7 @@ #include #include +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/optimization.h" @@ -57,7 +47,9 @@ namespace crc_internal { // memcpy can save 1 DRAM load of the destination cacheline. constexpr size_t kCacheLineSize = ABSL_CACHELINE_SIZE; -// If the objects overlap, the behavior is undefined. +// If the objects overlap, the behavior is undefined. Uses regular memcpy +// instead of non-temporal memcpy if the required CPU intrinsics are unavailable +// at compile time. inline void *non_temporal_store_memcpy(void *__restrict dst, const void *__restrict src, size_t len) { #if defined(__SSE3__) || defined(__aarch64__) || \ @@ -119,10 +111,20 @@ inline void *non_temporal_store_memcpy(void *__restrict dst, #endif // __SSE3__ || __aarch64__ || (_MSC_VER && __AVX__) } +// If the objects overlap, the behavior is undefined. Uses regular memcpy +// instead of non-temporal memcpy if the required CPU intrinsics are unavailable +// at compile time. +#if ABSL_HAVE_CPP_ATTRIBUTE(gnu::target) && \ + (defined(__x86_64__) || defined(__i386__)) +[[gnu::target("avx")]] +#endif inline void *non_temporal_store_memcpy_avx(void *__restrict dst, const void *__restrict src, size_t len) { -#ifdef __AVX__ + // This function requires AVX. For clang and gcc we compile it with AVX even + // if the translation unit isn't built with AVX support. This works because we + // only select this implementation at runtime if the CPU supports AVX. +#if defined(__SSE3__) || (defined(_MSC_VER) && defined(__AVX__)) uint8_t *d = reinterpret_cast(dst); const uint8_t *s = reinterpret_cast(src); @@ -168,9 +170,8 @@ inline void *non_temporal_store_memcpy_avx(void *__restrict dst, } return dst; #else - // Fallback to regular memcpy when AVX is not available. return memcpy(dst, src, len); -#endif // __AVX__ +#endif // __SSE3__ || (_MSC_VER && __AVX__) } } // namespace crc_internal diff --git a/yass/third_party/abseil-cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc b/yass/third_party/abseil-cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc index 1e07e042b0..b123479b1a 100644 --- a/yass/third_party/abseil-cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc +++ b/yass/third_party/abseil-cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc @@ -89,6 +89,8 @@ struct StackInfo { static bool InsideSignalStack(void** ptr, const StackInfo* stack_info) { uintptr_t comparable_ptr = reinterpret_cast(ptr); + if (stack_info->sig_stack_high == kUnknownStackEnd) + return false; return (comparable_ptr >= stack_info->sig_stack_low && comparable_ptr < stack_info->sig_stack_high); } diff --git a/yass/third_party/abseil-cpp/absl/flags/commandlineflag.h b/yass/third_party/abseil-cpp/absl/flags/commandlineflag.h index c30aa6094f..26ec0e7d84 100644 --- a/yass/third_party/abseil-cpp/absl/flags/commandlineflag.h +++ b/yass/third_party/abseil-cpp/absl/flags/commandlineflag.h @@ -59,6 +59,14 @@ class PrivateHandleAccessor; // // Now you can get flag info from that reflection handle. // std::string flag_location = my_flag_data->Filename(); // ... + +// These are only used as constexpr global objects. +// They do not use a virtual destructor to simplify their implementation. +// They are not destroyed except at program exit, so leaks do not matter. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#endif class CommandLineFlag { public: constexpr CommandLineFlag() = default; @@ -193,6 +201,9 @@ class CommandLineFlag { // flag's value type. virtual void CheckDefaultValueParsingRoundtrip() const = 0; }; +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif ABSL_NAMESPACE_END } // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/flags/flag_benchmark.cc b/yass/third_party/abseil-cpp/absl/flags/flag_benchmark.cc index 758a6a554e..88cc0c5657 100644 --- a/yass/third_party/abseil-cpp/absl/flags/flag_benchmark.cc +++ b/yass/third_party/abseil-cpp/absl/flags/flag_benchmark.cc @@ -143,7 +143,7 @@ constexpr size_t kNumFlags = 0 REPLICATE(COUNT, _, _); #pragma clang section data = ".benchmark_flags" #endif #define DEFINE_FLAG(T, name, index) ABSL_FLAG(T, name##_##index, {}, ""); -#define FLAG_DEF(T) REPLICATE(DEFINE_FLAG, T, T##_flag); +#define FLAG_DEF(T) REPLICATE(DEFINE_FLAG, T, T##_flag) BENCHMARKED_TYPES(FLAG_DEF) #if defined(__clang__) && defined(__linux__) #pragma clang section data = "" diff --git a/yass/third_party/abseil-cpp/absl/flags/internal/flag.h b/yass/third_party/abseil-cpp/absl/flags/internal/flag.h index 7b0563827e..602531b1b3 100644 --- a/yass/third_party/abseil-cpp/absl/flags/internal/flag.h +++ b/yass/third_party/abseil-cpp/absl/flags/internal/flag.h @@ -424,6 +424,13 @@ struct DynValueDeleter { class FlagState; +// These are only used as constexpr global objects. +// They do not use a virtual destructor to simplify their implementation. +// They are not destroyed except at program exit, so leaks do not matter. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#endif class FlagImpl final : public CommandLineFlag { public: constexpr FlagImpl(const char* name, const char* filename, FlagOpFn op, @@ -623,6 +630,9 @@ class FlagImpl final : public CommandLineFlag { // problems. alignas(absl::Mutex) mutable char data_guard_[sizeof(absl::Mutex)]; }; +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif /////////////////////////////////////////////////////////////////////////////// // The Flag object parameterized by the flag's value type. This class implements @@ -753,8 +763,7 @@ void* FlagOps(FlagOp op, const void* v1, void* v2, void* v3) { // Round sizeof(FlagImp) to a multiple of alignof(FlagValue) to get the // offset of the data. size_t round_to = alignof(FlagValue); - size_t offset = - (sizeof(FlagImpl) + round_to - 1) / round_to * round_to; + size_t offset = (sizeof(FlagImpl) + round_to - 1) / round_to * round_to; return reinterpret_cast(offset); } } diff --git a/yass/third_party/abseil-cpp/absl/flags/parse_test.cc b/yass/third_party/abseil-cpp/absl/flags/parse_test.cc index b3d4ce4e8e..9997069280 100644 --- a/yass/third_party/abseil-cpp/absl/flags/parse_test.cc +++ b/yass/third_party/abseil-cpp/absl/flags/parse_test.cc @@ -44,31 +44,31 @@ #define FLAG_MULT(x) F3(x) #define TEST_FLAG_HEADER FLAG_HEADER_ -#define F(name) ABSL_FLAG(int, name, 0, ""); +#define F(name) ABSL_FLAG(int, name, 0, "") #define F1(name) \ F(name##1); \ F(name##2); \ F(name##3); \ F(name##4); \ - F(name##5); + F(name##5) /**/ #define F2(name) \ F1(name##1); \ F1(name##2); \ F1(name##3); \ F1(name##4); \ - F1(name##5); + F1(name##5) /**/ #define F3(name) \ F2(name##1); \ F2(name##2); \ F2(name##3); \ F2(name##4); \ - F2(name##5); + F2(name##5) /**/ -FLAG_MULT(TEST_FLAG_HEADER) +FLAG_MULT(TEST_FLAG_HEADER); namespace { diff --git a/yass/third_party/abseil-cpp/absl/flags/reflection.cc b/yass/third_party/abseil-cpp/absl/flags/reflection.cc index 841921a926..ea856ff9c1 100644 --- a/yass/third_party/abseil-cpp/absl/flags/reflection.cc +++ b/yass/third_party/abseil-cpp/absl/flags/reflection.cc @@ -217,6 +217,13 @@ void FinalizeRegistry() { namespace { +// These are only used as constexpr global objects. +// They do not use a virtual destructor to simplify their implementation. +// They are not destroyed except at program exit, so leaks do not matter. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#endif class RetiredFlagObj final : public CommandLineFlag { public: constexpr RetiredFlagObj(const char* name, FlagFastTypeId type_id) @@ -276,6 +283,9 @@ class RetiredFlagObj final : public CommandLineFlag { const char* const name_; const FlagFastTypeId type_id_; }; +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif } // namespace diff --git a/yass/third_party/abseil-cpp/absl/hash/hash_benchmark.cc b/yass/third_party/abseil-cpp/absl/hash/hash_benchmark.cc index d18ea694aa..9b73f46135 100644 --- a/yass/third_party/abseil-cpp/absl/hash/hash_benchmark.cc +++ b/yass/third_party/abseil-cpp/absl/hash/hash_benchmark.cc @@ -160,7 +160,7 @@ absl::flat_hash_set FlatHashSet(size_t count) { return hash{}(arg); \ } \ bool absl_hash_test_odr_use##hash##name = \ - (benchmark::DoNotOptimize(&Codegen##hash##name), false); + (benchmark::DoNotOptimize(&Codegen##hash##name), false) MAKE_BENCHMARK(AbslHash, Int32, int32_t{}); MAKE_BENCHMARK(AbslHash, Int64, int64_t{}); @@ -315,9 +315,9 @@ struct StringRand { BENCHMARK(BM_latency_##hash##_##name); \ } // namespace -MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand); -MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand); -MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>); -MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>); -MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>); -MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>); +MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand) +MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand) +MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>) +MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>) +MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>) +MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>) diff --git a/yass/third_party/abseil-cpp/absl/hash/internal/low_level_hash.cc b/yass/third_party/abseil-cpp/absl/hash/internal/low_level_hash.cc index 43de67299d..6dc71cf7cd 100644 --- a/yass/third_party/abseil-cpp/absl/hash/internal/low_level_hash.cc +++ b/yass/third_party/abseil-cpp/absl/hash/internal/low_level_hash.cc @@ -37,6 +37,7 @@ uint64_t LowLevelHashLenGt16(const void* data, size_t len, uint64_t seed, PrefetchToLocalCache(data); const uint8_t* ptr = static_cast(data); uint64_t starting_length = static_cast(len); + const uint8_t* last_16_ptr = ptr + starting_length - 16; uint64_t current_state = seed ^ salt[0]; if (len > 64) { @@ -97,15 +98,12 @@ uint64_t LowLevelHashLenGt16(const void* data, size_t len, uint64_t seed, uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); current_state = Mix(a ^ salt[1], b ^ current_state); - - ptr += 16; - len -= 16; } // We now have a data `ptr` with at least 1 and at most 16 bytes. But we can // safely read from `ptr + len - 16`. - uint64_t a = absl::base_internal::UnalignedLoad64(ptr + len - 16); - uint64_t b = absl::base_internal::UnalignedLoad64(ptr + len - 8); + uint64_t a = absl::base_internal::UnalignedLoad64(last_16_ptr); + uint64_t b = absl::base_internal::UnalignedLoad64(last_16_ptr + 8); return Mix(a ^ salt[1] ^ starting_length, b ^ current_state); } diff --git a/yass/third_party/abseil-cpp/absl/log/BUILD.bazel b/yass/third_party/abseil-cpp/absl/log/BUILD.bazel index 7ff5143ddd..0f17239804 100644 --- a/yass/third_party/abseil-cpp/absl/log/BUILD.bazel +++ b/yass/third_party/abseil-cpp/absl/log/BUILD.bazel @@ -243,9 +243,6 @@ cc_library( hdrs = ["absl_vlog_is_on.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = [ - "//absl/log:__subpackages__", - ], deps = [ "//absl/base:config", "//absl/base:core_headers", @@ -259,9 +256,6 @@ cc_library( hdrs = ["vlog_is_on.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = [ - "//absl/log:__subpackages__", - ], deps = [ ":absl_vlog_is_on", ], @@ -452,6 +446,7 @@ cc_library( "//absl/log:globals", "//absl/log:log_entry", "//absl/log:scoped_mock_log", + "//absl/log/internal:globals", "//absl/log/internal:test_actions", "//absl/log/internal:test_helpers", "//absl/log/internal:test_matchers", diff --git a/yass/third_party/abseil-cpp/absl/log/CMakeLists.txt b/yass/third_party/abseil-cpp/absl/log/CMakeLists.txt index a7d8b69031..acf89720b1 100644 --- a/yass/third_party/abseil-cpp/absl/log/CMakeLists.txt +++ b/yass/third_party/abseil-cpp/absl/log/CMakeLists.txt @@ -807,6 +807,7 @@ absl_cc_test( absl::log_entry absl::log_globals absl::log_severity + absl::log_internal_globals absl::log_internal_test_actions absl::log_internal_test_helpers absl::log_internal_test_matchers diff --git a/yass/third_party/abseil-cpp/absl/log/absl_log_basic_test.cc b/yass/third_party/abseil-cpp/absl/log/absl_log_basic_test.cc index 3a4b83c15a..7378f5a802 100644 --- a/yass/third_party/abseil-cpp/absl/log/absl_log_basic_test.cc +++ b/yass/third_party/abseil-cpp/absl/log/absl_log_basic_test.cc @@ -16,6 +16,7 @@ #include "absl/log/absl_log.h" #define ABSL_TEST_LOG ABSL_LOG +#define ABSL_TEST_DLOG ABSL_DLOG #include "gtest/gtest.h" #include "absl/log/log_basic_test_impl.inc" diff --git a/yass/third_party/abseil-cpp/absl/log/absl_vlog_is_on.h b/yass/third_party/abseil-cpp/absl/log/absl_vlog_is_on.h index 29096b4857..6bf6c41339 100644 --- a/yass/third_party/abseil-cpp/absl/log/absl_vlog_is_on.h +++ b/yass/third_party/abseil-cpp/absl/log/absl_vlog_is_on.h @@ -46,12 +46,12 @@ // Files which do not match any pattern in `--vmodule` use the value of `--v` as // their effective verbosity level. The default is 0. // -// SetVLOGLevel helper function is provided to do limited dynamic control over +// SetVLogLevel helper function is provided to do limited dynamic control over // V-logging by appending to `--vmodule`. Because these go at the beginning of // the list, they take priority over any globs previously added. // // Resetting --vmodule will override all previous modifications to `--vmodule`, -// including via SetVLOGLevel. +// including via SetVLogLevel. #ifndef ABSL_LOG_ABSL_VLOG_IS_ON_H_ #define ABSL_LOG_ABSL_VLOG_IS_ON_H_ @@ -77,7 +77,7 @@ // Each ABSL_VLOG_IS_ON call site gets its own VLogSite that registers with the // global linked list of sites to asynchronously update its verbosity level on // changes to --v or --vmodule. The verbosity can also be set by manually -// calling SetVLOGLevel. +// calling SetVLogLevel. // // ABSL_VLOG_IS_ON is not async signal safe, but it is guaranteed not to // allocate new memory. diff --git a/yass/third_party/abseil-cpp/absl/log/internal/BUILD.bazel b/yass/third_party/abseil-cpp/absl/log/internal/BUILD.bazel index 2a8c1a4780..21958f7e07 100644 --- a/yass/third_party/abseil-cpp/absl/log/internal/BUILD.bazel +++ b/yass/third_party/abseil-cpp/absl/log/internal/BUILD.bazel @@ -384,7 +384,9 @@ cc_library( hdrs = ["vlog_config.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//absl/log:__subpackages__"], + visibility = [ + "//absl/log:__subpackages__", + ], deps = [ "//absl/base", "//absl/base:config", diff --git a/yass/third_party/abseil-cpp/absl/log/internal/check_op.h b/yass/third_party/abseil-cpp/absl/log/internal/check_op.h index 11f0f4078b..53760fd527 100644 --- a/yass/third_party/abseil-cpp/absl/log/internal/check_op.h +++ b/yass/third_party/abseil-cpp/absl/log/internal/check_op.h @@ -58,13 +58,12 @@ #endif #define ABSL_LOG_INTERNAL_CHECK_OP(name, op, val1, val1_text, val2, val2_text) \ - while ( \ - ::std::string* absl_log_internal_check_op_result ABSL_ATTRIBUTE_UNUSED = \ - ::absl::log_internal::name##Impl( \ - ::absl::log_internal::GetReferenceableValue(val1), \ - ::absl::log_internal::GetReferenceableValue(val2), \ - ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(val1_text \ - " " #op " " val2_text))) \ + while (::std::string* absl_log_internal_check_op_result = \ + ::absl::log_internal::name##Impl( \ + ::absl::log_internal::GetReferenceableValue(val1), \ + ::absl::log_internal::GetReferenceableValue(val2), \ + ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL( \ + val1_text " " #op " " val2_text))) \ ABSL_LOG_INTERNAL_CONDITION_FATAL(STATELESS, true) \ ABSL_LOG_INTERNAL_CHECK(*absl_log_internal_check_op_result).InternalStream() #define ABSL_LOG_INTERNAL_QCHECK_OP(name, op, val1, val1_text, val2, \ diff --git a/yass/third_party/abseil-cpp/absl/log/internal/log_impl.h b/yass/third_party/abseil-cpp/absl/log/internal/log_impl.h index 99de6dbb24..b44ed06819 100644 --- a/yass/third_party/abseil-cpp/absl/log/internal/log_impl.h +++ b/yass/third_party/abseil-cpp/absl/log/internal/log_impl.h @@ -35,11 +35,11 @@ #ifndef NDEBUG #define ABSL_LOG_INTERNAL_DLOG_IMPL(severity) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATELESS, true) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #else #define ABSL_LOG_INTERNAL_DLOG_IMPL(severity) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATELESS, false) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #endif // The `switch` ensures that this expansion is the begnning of a statement (as @@ -58,7 +58,7 @@ switch (const int absl_logging_internal_verbose_level = (verbose_level)) \ case 0: \ default: \ - ABSL_LOG_INTERNAL_LOG_IF_IMPL( \ + ABSL_LOG_INTERNAL_DLOG_IF_IMPL( \ _INFO, ABSL_VLOG_IS_ON(absl_logging_internal_verbose_level)) \ .WithVerbosity(absl_logging_internal_verbose_level) #else @@ -66,7 +66,7 @@ switch (const int absl_logging_internal_verbose_level = (verbose_level)) \ case 0: \ default: \ - ABSL_LOG_INTERNAL_LOG_IF_IMPL( \ + ABSL_LOG_INTERNAL_DLOG_IF_IMPL( \ _INFO, false && ABSL_VLOG_IS_ON(absl_logging_internal_verbose_level)) \ .WithVerbosity(absl_logging_internal_verbose_level) #endif @@ -82,11 +82,11 @@ #ifndef NDEBUG #define ABSL_LOG_INTERNAL_DLOG_IF_IMPL(severity, condition) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATELESS, condition) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #else #define ABSL_LOG_INTERNAL_DLOG_IF_IMPL(severity, condition) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATELESS, false && (condition)) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #endif // ABSL_LOG_EVERY_N @@ -132,36 +132,36 @@ #ifndef NDEBUG #define ABSL_LOG_INTERNAL_DLOG_EVERY_N_IMPL(severity, n) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, true) \ - (EveryN, n) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (EveryN, n) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_FIRST_N_IMPL(severity, n) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, true) \ - (FirstN, n) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (FirstN, n) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_EVERY_POW_2_IMPL(severity) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, true) \ - (EveryPow2) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (EveryPow2) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_EVERY_N_SEC_IMPL(severity, n_seconds) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, true) \ - (EveryNSec, n_seconds) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (EveryNSec, n_seconds) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #else // def NDEBUG #define ABSL_LOG_INTERNAL_DLOG_EVERY_N_IMPL(severity, n) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, false) \ - (EveryN, n) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (EveryN, n) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_FIRST_N_IMPL(severity, n) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, false) \ - (FirstN, n) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (FirstN, n) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_EVERY_POW_2_IMPL(severity) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, false) \ - (EveryPow2) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (EveryPow2) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_EVERY_N_SEC_IMPL(severity, n_seconds) \ ABSL_LOG_INTERNAL_CONDITION_INFO(STATEFUL, false) \ - (EveryNSec, n_seconds) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + (EveryNSec, n_seconds) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #endif // def NDEBUG #define ABSL_LOG_INTERNAL_VLOG_EVERY_N_IMPL(verbose_level, n) \ @@ -243,40 +243,40 @@ #ifndef NDEBUG #define ABSL_LOG_INTERNAL_DLOG_IF_EVERY_N_IMPL(severity, condition, n) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, condition)(EveryN, n) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_IF_FIRST_N_IMPL(severity, condition, n) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, condition)(FirstN, n) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_IF_EVERY_POW_2_IMPL(severity, condition) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, condition)(EveryPow2) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_IF_EVERY_N_SEC_IMPL(severity, condition, \ n_seconds) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, condition)(EveryNSec, \ n_seconds) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #else // def NDEBUG #define ABSL_LOG_INTERNAL_DLOG_IF_EVERY_N_IMPL(severity, condition, n) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, false && (condition))( \ - EveryN, n) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + EveryN, n) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_IF_FIRST_N_IMPL(severity, condition, n) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, false && (condition))( \ - FirstN, n) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + FirstN, n) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_IF_EVERY_POW_2_IMPL(severity, condition) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, false && (condition))( \ - EveryPow2) ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + EveryPow2) ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #define ABSL_LOG_INTERNAL_DLOG_IF_EVERY_N_SEC_IMPL(severity, condition, \ n_seconds) \ ABSL_LOG_INTERNAL_CONDITION##severity(STATEFUL, false && (condition))( \ EveryNSec, n_seconds) \ - ABSL_LOGGING_INTERNAL_LOG##severity.InternalStream() + ABSL_LOGGING_INTERNAL_DLOG##severity.InternalStream() #endif // def NDEBUG #endif // ABSL_LOG_INTERNAL_LOG_IMPL_H_ diff --git a/yass/third_party/abseil-cpp/absl/log/internal/log_message.cc b/yass/third_party/abseil-cpp/absl/log/internal/log_message.cc index 10ac245373..58505267f0 100644 --- a/yass/third_party/abseil-cpp/absl/log/internal/log_message.cc +++ b/yass/third_party/abseil-cpp/absl/log/internal/log_message.cc @@ -578,6 +578,13 @@ template void LogMessage::CopyToEncodedBuffer( template void LogMessage::CopyToEncodedBuffer< LogMessage::StringType::kNotLiteral>(char ch, size_t num); +// We intentionally don't return from these destructors. Disable MSVC's warning +// about the destructor never returning as we do so intentionally here. +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 4722) +#endif + LogMessageFatal::LogMessageFatal(const char* file, int line) : LogMessage(file, line, absl::LogSeverity::kFatal) {} @@ -587,19 +594,29 @@ LogMessageFatal::LogMessageFatal(const char* file, int line, *this << "Check failed: " << failure_msg << " "; } -// ABSL_ATTRIBUTE_NORETURN doesn't seem to work on destructors with msvc, so -// disable msvc's warning about the d'tor never returning. -#if defined(_MSC_VER) && !defined(__clang__) -#pragma warning(push) -#pragma warning(disable : 4722) -#endif LogMessageFatal::~LogMessageFatal() { Flush(); FailWithoutStackTrace(); } -#if defined(_MSC_VER) && !defined(__clang__) -#pragma warning(pop) -#endif + +LogMessageDebugFatal::LogMessageDebugFatal(const char* file, int line) + : LogMessage(file, line, absl::LogSeverity::kFatal) {} + +LogMessageDebugFatal::~LogMessageDebugFatal() { + Flush(); + FailWithoutStackTrace(); +} + +LogMessageQuietlyDebugFatal::LogMessageQuietlyDebugFatal(const char* file, + int line) + : LogMessage(file, line, absl::LogSeverity::kFatal) { + SetFailQuietly(); +} + +LogMessageQuietlyDebugFatal::~LogMessageQuietlyDebugFatal() { + Flush(); + FailQuietly(); +} LogMessageQuietlyFatal::LogMessageQuietlyFatal(const char* file, int line) : LogMessage(file, line, absl::LogSeverity::kFatal) { @@ -608,17 +625,10 @@ LogMessageQuietlyFatal::LogMessageQuietlyFatal(const char* file, int line) LogMessageQuietlyFatal::LogMessageQuietlyFatal(const char* file, int line, absl::string_view failure_msg) - : LogMessage(file, line, absl::LogSeverity::kFatal) { - SetFailQuietly(); - *this << "Check failed: " << failure_msg << " "; + : LogMessageQuietlyFatal(file, line) { + *this << "Check failed: " << failure_msg << " "; } -// ABSL_ATTRIBUTE_NORETURN doesn't seem to work on destructors with msvc, so -// disable msvc's warning about the d'tor never returning. -#if defined(_MSC_VER) && !defined(__clang__) -#pragma warning(push) -#pragma warning(disable : 4722) -#endif LogMessageQuietlyFatal::~LogMessageQuietlyFatal() { Flush(); FailQuietly(); diff --git a/yass/third_party/abseil-cpp/absl/log/internal/log_message.h b/yass/third_party/abseil-cpp/absl/log/internal/log_message.h index 4ecb8a1498..fa72121414 100644 --- a/yass/third_party/abseil-cpp/absl/log/internal/log_message.h +++ b/yass/third_party/abseil-cpp/absl/log/internal/log_message.h @@ -357,6 +357,25 @@ class LogMessageFatal final : public LogMessage { ABSL_ATTRIBUTE_NORETURN ~LogMessageFatal(); }; +// `LogMessageDebugFatal` ensures the process will exit in failure after logging +// this message. It matches LogMessageFatal but is not [[noreturn]] as it's used +// for DLOG(FATAL) variants. +class LogMessageDebugFatal final : public LogMessage { + public: + LogMessageDebugFatal(const char* file, int line) ABSL_ATTRIBUTE_COLD; + ~LogMessageDebugFatal(); +}; + +class LogMessageQuietlyDebugFatal final : public LogMessage { + public: + // DLOG(QFATAL) calls this instead of LogMessageQuietlyFatal to make sure the + // destructor is not [[noreturn]] even if this is always FATAL as this is only + // invoked when DLOG() is enabled. + LogMessageQuietlyDebugFatal(const char* file, int line) ABSL_ATTRIBUTE_COLD; + ~LogMessageQuietlyDebugFatal(); +}; + +// Used for LOG(QFATAL) to make sure it's properly understood as [[noreturn]]. class LogMessageQuietlyFatal final : public LogMessage { public: LogMessageQuietlyFatal(const char* file, int line) ABSL_ATTRIBUTE_COLD; diff --git a/yass/third_party/abseil-cpp/absl/log/internal/strip.h b/yass/third_party/abseil-cpp/absl/log/internal/strip.h index f8d2786939..dfde578667 100644 --- a/yass/third_party/abseil-cpp/absl/log/internal/strip.h +++ b/yass/third_party/abseil-cpp/absl/log/internal/strip.h @@ -38,6 +38,13 @@ ::absl::log_internal::NullStreamMaybeFatal(::absl::kLogDebugFatal) #define ABSL_LOGGING_INTERNAL_LOG_LEVEL(severity) \ ::absl::log_internal::NullStreamMaybeFatal(absl_log_internal_severity) + +// Fatal `DLOG`s expand a little differently to avoid being `[[noreturn]]`. +#define ABSL_LOGGING_INTERNAL_DLOG_FATAL \ + ::absl::log_internal::NullStreamMaybeFatal(::absl::LogSeverity::kFatal) +#define ABSL_LOGGING_INTERNAL_DLOG_QFATAL \ + ::absl::log_internal::NullStreamMaybeFatal(::absl::LogSeverity::kFatal) + #define ABSL_LOG_INTERNAL_CHECK(failure_message) ABSL_LOGGING_INTERNAL_LOG_FATAL #define ABSL_LOG_INTERNAL_QCHECK(failure_message) \ ABSL_LOGGING_INTERNAL_LOG_QFATAL @@ -60,6 +67,13 @@ #define ABSL_LOGGING_INTERNAL_LOG_LEVEL(severity) \ ::absl::log_internal::LogMessage(__FILE__, __LINE__, \ absl_log_internal_severity) + +// Fatal `DLOG`s expand a little differently to avoid being `[[noreturn]]`. +#define ABSL_LOGGING_INTERNAL_DLOG_FATAL \ + ::absl::log_internal::LogMessageDebugFatal(__FILE__, __LINE__) +#define ABSL_LOGGING_INTERNAL_DLOG_QFATAL \ + ::absl::log_internal::LogMessageQuietlyDebugFatal(__FILE__, __LINE__) + // These special cases dispatch to special-case constructors that allow us to // avoid an extra function call and shrink non-LTO binaries by a percent or so. #define ABSL_LOG_INTERNAL_CHECK(failure_message) \ @@ -69,4 +83,11 @@ failure_message) #endif // !defined(STRIP_LOG) || !STRIP_LOG +// This part of a non-fatal `DLOG`s expands the same as `LOG`. +#define ABSL_LOGGING_INTERNAL_DLOG_INFO ABSL_LOGGING_INTERNAL_LOG_INFO +#define ABSL_LOGGING_INTERNAL_DLOG_WARNING ABSL_LOGGING_INTERNAL_LOG_WARNING +#define ABSL_LOGGING_INTERNAL_DLOG_ERROR ABSL_LOGGING_INTERNAL_LOG_ERROR +#define ABSL_LOGGING_INTERNAL_DLOG_DFATAL ABSL_LOGGING_INTERNAL_LOG_DFATAL +#define ABSL_LOGGING_INTERNAL_DLOG_LEVEL ABSL_LOGGING_INTERNAL_LOG_LEVEL + #endif // ABSL_LOG_INTERNAL_STRIP_H_ diff --git a/yass/third_party/abseil-cpp/absl/log/log.h b/yass/third_party/abseil-cpp/absl/log/log.h index b721b087cc..a4e1d1fe81 100644 --- a/yass/third_party/abseil-cpp/absl/log/log.h +++ b/yass/third_party/abseil-cpp/absl/log/log.h @@ -198,7 +198,6 @@ #define ABSL_LOG_LOG_H_ #include "absl/log/internal/log_impl.h" -#include "absl/log/vlog_is_on.h" // IWYU pragma: export // LOG() // @@ -234,6 +233,11 @@ // // See vlog_is_on.h for further documentation, including the usage of the // --vmodule flag to log at different levels in different source files. +// +// `VLOG` does not produce any output when verbose logging is not enabled. +// However, simply testing whether verbose logging is enabled can be expensive. +// If you don't intend to enable verbose logging in non-debug builds, consider +// using `DVLOG` instead. #define VLOG(severity) ABSL_LOG_INTERNAL_VLOG_IMPL(severity) // `DVLOG` behaves like `VLOG` in debug mode (i.e. `#ifndef NDEBUG`). diff --git a/yass/third_party/abseil-cpp/absl/log/log_basic_test.cc b/yass/third_party/abseil-cpp/absl/log/log_basic_test.cc index 7fc7111de6..ef8967af3d 100644 --- a/yass/third_party/abseil-cpp/absl/log/log_basic_test.cc +++ b/yass/third_party/abseil-cpp/absl/log/log_basic_test.cc @@ -16,6 +16,7 @@ #include "absl/log/log.h" #define ABSL_TEST_LOG LOG +#define ABSL_TEST_DLOG DLOG #include "gtest/gtest.h" #include "absl/log/log_basic_test_impl.inc" diff --git a/yass/third_party/abseil-cpp/absl/log/log_basic_test_impl.inc b/yass/third_party/abseil-cpp/absl/log/log_basic_test_impl.inc index e2f33566b8..8083ff2c7e 100644 --- a/yass/third_party/abseil-cpp/absl/log/log_basic_test_impl.inc +++ b/yass/third_party/abseil-cpp/absl/log/log_basic_test_impl.inc @@ -25,6 +25,10 @@ #error ABSL_TEST_LOG must be defined for these tests to work. #endif +#ifndef ABSL_TEST_DLOG +#error ABSL_TEST_DLOG must be defined for these tests to work. +#endif + #include #include #include @@ -34,6 +38,7 @@ #include "absl/base/internal/sysinfo.h" #include "absl/base/log_severity.h" #include "absl/log/globals.h" +#include "absl/log/internal/globals.h" #include "absl/log/internal/test_actions.h" #include "absl/log/internal/test_helpers.h" #include "absl/log/internal/test_matchers.h" @@ -367,6 +372,27 @@ TEST_P(BasicLogDeathTest, DFatal) { } #endif +#ifndef NDEBUG +TEST_P(BasicLogTest, DFatalIsCancellable) { + // LOG(DFATAL) does not die when DFATAL death is disabled. + absl::log_internal::SetExitOnDFatal(false); + ABSL_TEST_LOG(DFATAL) << "hello world"; + absl::log_internal::SetExitOnDFatal(true); +} + +#if GTEST_HAS_DEATH_TEST +TEST_P(BasicLogDeathTest, DLogFatalIsNotCancellable) { + EXPECT_EXIT( + { + absl::log_internal::SetExitOnDFatal(false); + ABSL_TEST_DLOG(FATAL) << "hello world"; + absl::log_internal::SetExitOnDFatal(true); + }, + DiedOfFatal, ""); +} +#endif +#endif + TEST_P(BasicLogTest, Level) { absl::log_internal::ScopedMinLogLevel scoped_min_log_level(GetParam()); diff --git a/yass/third_party/abseil-cpp/absl/log/vlog_is_on.h b/yass/third_party/abseil-cpp/absl/log/vlog_is_on.h index 7898651380..f7539df4ea 100644 --- a/yass/third_party/abseil-cpp/absl/log/vlog_is_on.h +++ b/yass/third_party/abseil-cpp/absl/log/vlog_is_on.h @@ -46,12 +46,12 @@ // Files which do not match any pattern in `--vmodule` use the value of `--v` as // their effective verbosity level. The default is 0. // -// SetVLOGLevel helper function is provided to do limited dynamic control over +// SetVLogLevel helper function is provided to do limited dynamic control over // V-logging by appending to `--vmodule`. Because these go at the beginning of // the list, they take priority over any globs previously added. // // Resetting --vmodule will override all previous modifications to `--vmodule`, -// including via SetVLOGLevel. +// including via SetVLogLevel. #ifndef ABSL_LOG_VLOG_IS_ON_H_ #define ABSL_LOG_VLOG_IS_ON_H_ @@ -63,7 +63,7 @@ // Each VLOG_IS_ON call site gets its own VLogSite that registers with the // global linked list of sites to asynchronously update its verbosity level on // changes to --v or --vmodule. The verbosity can also be set by manually -// calling SetVLOGLevel. +// calling SetVLogLevel. // // VLOG_IS_ON is not async signal safe, but it is guaranteed not to allocate // new memory. diff --git a/yass/third_party/abseil-cpp/absl/meta/type_traits.h b/yass/third_party/abseil-cpp/absl/meta/type_traits.h index ed5e608074..ded55820a3 100644 --- a/yass/third_party/abseil-cpp/absl/meta/type_traits.h +++ b/yass/third_party/abseil-cpp/absl/meta/type_traits.h @@ -37,11 +37,21 @@ #include #include +#include #include +#include #include "absl/base/attributes.h" #include "absl/base/config.h" +#ifdef __cpp_lib_span +#include // NOLINT(build/c++20) +#endif + +#ifdef ABSL_HAVE_STD_STRING_VIEW +#include +#endif + // Defines the default alignment. `__STDCPP_DEFAULT_NEW_ALIGNMENT__` is a C++17 // feature. #if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) @@ -279,27 +289,6 @@ using remove_extent_t = typename std::remove_extent::type; template using remove_all_extents_t = typename std::remove_all_extents::type; -ABSL_INTERNAL_DISABLE_DEPRECATED_DECLARATION_WARNING -namespace type_traits_internal { -// This trick to retrieve a default alignment is necessary for our -// implementation of aligned_storage_t to be consistent with any -// implementation of std::aligned_storage. -template > -struct default_alignment_of_aligned_storage; - -template -struct default_alignment_of_aligned_storage< - Len, std::aligned_storage> { - static constexpr size_t value = Align; -}; -} // namespace type_traits_internal - -// TODO(b/260219225): std::aligned_storage(_t) is deprecated in C++23. -template ::value> -using aligned_storage_t = typename std::aligned_storage::type; -ABSL_INTERNAL_RESTORE_DEPRECATED_DECLARATION_WARNING - template using decay_t = typename std::decay::type; @@ -574,6 +563,97 @@ constexpr bool is_constant_evaluated() noexcept { #endif } #endif // ABSL_HAVE_CONSTANT_EVALUATED + +namespace type_traits_internal { + +// Detects if a class's definition has declared itself to be an owner by +// declaring +// using absl_internal_is_view = std::true_type; +// as a member. +// Types that don't want either must either omit this declaration entirely, or +// (if e.g. inheriting from a base class) define the member to something that +// isn't a Boolean trait class, such as `void`. +// Do not specialize or use this directly. It's an implementation detail. +template +struct IsOwnerImpl : std::false_type { + static_assert(std::is_same>::value, + "type must lack qualifiers"); +}; + +template +struct IsOwnerImpl< + T, + std::enable_if_t::value>> + : absl::negation {}; + +// A trait to determine whether a type is an owner. +// Do *not* depend on the correctness of this trait for correct code behavior. +// It is only a safety feature and its value may change in the future. +// Do not specialize this; instead, define the member trait inside your type so +// that it can be auto-detected, and to prevent ODR violations. +// If it ever becomes possible to detect [[gsl::Owner]], we should leverage it: +// https://wg21.link/p1179 +template +struct IsOwner : IsOwnerImpl {}; + +template +struct IsOwner> : std::true_type {}; + +template +struct IsOwner> : std::true_type {}; + +// Detects if a class's definition has declared itself to be a view by declaring +// using absl_internal_is_view = std::true_type; +// as a member. +// Do not specialize or use this directly. +template +struct IsViewImpl : std::false_type { + static_assert(std::is_same>::value, + "type must lack qualifiers"); +}; + +template +struct IsViewImpl< + T, + std::enable_if_t::value>> + : T::absl_internal_is_view {}; + +// A trait to determine whether a type is a view. +// Do *not* depend on the correctness of this trait for correct code behavior. +// It is only a safety feature, and its value may change in the future. +// Do not specialize this trait. Instead, define the member +// using absl_internal_is_view = std::true_type; +// in your class to allow its detection while preventing ODR violations. +// If it ever becomes possible to detect [[gsl::Pointer]], we should leverage +// it: https://wg21.link/p1179 +template +struct IsView : std::integral_constant::value || + IsViewImpl::value> {}; + +#ifdef ABSL_HAVE_STD_STRING_VIEW +template +struct IsView> : std::true_type {}; +#endif + +#ifdef __cpp_lib_span +template +struct IsView> : std::true_type {}; +#endif + +// Determines whether the assignment of the given types is lifetime-bound. +// Do *not* depend on the correctness of this trait for correct code behavior. +// It is only a safety feature and its value may change in the future. +// If it ever becomes possible to detect [[clang::lifetimebound]] directly, +// we should change the implementation to leverage that. +// Until then, we consider an assignment from an "owner" (such as std::string) +// to a "view" (such as std::string_view) to be a lifetime-bound assignment. +template +using IsLifetimeBoundAssignment = + std::integral_constant>::value && + IsOwner>::value>; + +} // namespace type_traits_internal + ABSL_NAMESPACE_END } // namespace absl diff --git a/yass/third_party/abseil-cpp/absl/meta/type_traits_test.cc b/yass/third_party/abseil-cpp/absl/meta/type_traits_test.cc index 25f5abbce7..1e056bb285 100644 --- a/yass/third_party/abseil-cpp/absl/meta/type_traits_test.cc +++ b/yass/third_party/abseil-cpp/absl/meta/type_traits_test.cc @@ -26,10 +26,32 @@ #include "absl/time/clock.h" #include "absl/time/time.h" +#ifdef ABSL_HAVE_STD_STRING_VIEW +#include +#endif + namespace { using ::testing::StaticAssertTypeEq; +template +using IsOwnerAndNotView = + absl::conjunction, + absl::negation>>; + +static_assert(IsOwnerAndNotView>::value, + "vector is an owner, not a view"); +static_assert(IsOwnerAndNotView::value, + "string is an owner, not a view"); +static_assert(IsOwnerAndNotView::value, + "wstring is an owner, not a view"); +#ifdef ABSL_HAVE_STD_STRING_VIEW +static_assert(!IsOwnerAndNotView::value, + "string_view is a view, not an owner"); +static_assert(!IsOwnerAndNotView::value, + "wstring_view is a view, not an owner"); +#endif + template struct simple_pair { T first; @@ -489,76 +511,6 @@ TEST(TypeTraitsTest, TestExtentAliases) { ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(remove_all_extents, int[][1]); } -TEST(TypeTraitsTest, TestAlignedStorageAlias) { - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 1); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 2); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 3); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 4); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 5); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 6); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 7); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 8); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 9); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 10); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 11); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 12); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 13); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 14); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 15); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 16); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 17); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 18); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 19); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 20); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 21); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 22); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 23); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 24); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 25); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 26); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 27); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 28); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 29); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 30); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 31); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 32); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 33); - - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 1, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 2, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 3, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 4, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 5, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 6, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 7, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 8, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 9, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 10, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 11, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 12, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 13, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 14, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 15, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 16, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 17, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 18, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 19, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 20, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 21, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 22, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 23, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 24, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 25, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 26, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 27, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 28, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 29, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 30, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 31, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 32, 128); - ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(aligned_storage, 33, 128); -} - TEST(TypeTraitsTest, TestDecay) { ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(decay, int); ABSL_INTERNAL_EXPECT_ALIAS_EQUIVALENCE(decay, const int); diff --git a/yass/third_party/abseil-cpp/absl/profiling/internal/periodic_sampler.h b/yass/third_party/abseil-cpp/absl/profiling/internal/periodic_sampler.h index 54f0af452b..f5d847ab25 100644 --- a/yass/third_party/abseil-cpp/absl/profiling/internal/periodic_sampler.h +++ b/yass/third_party/abseil-cpp/absl/profiling/internal/periodic_sampler.h @@ -172,7 +172,7 @@ inline bool PeriodicSamplerBase::Sample() noexcept { // Typical use case: // // struct HashTablezTag {}; -// thread_local PeriodicSampler sampler; +// thread_local PeriodicSampler sampler; // // void HashTableSamplingLogic(...) { // if (sampler.Sample()) { diff --git a/yass/third_party/abseil-cpp/absl/random/benchmarks.cc b/yass/third_party/abseil-cpp/absl/random/benchmarks.cc index 0900e81839..26bc95e846 100644 --- a/yass/third_party/abseil-cpp/absl/random/benchmarks.cc +++ b/yass/third_party/abseil-cpp/absl/random/benchmarks.cc @@ -291,7 +291,7 @@ void BM_Thread(benchmark::State& state) { BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100)->ThreadPerCpu(); \ BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000)->ThreadPerCpu(); \ BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100)->ThreadPerCpu(); \ - BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000)->ThreadPerCpu(); + BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000)->ThreadPerCpu() #define BM_EXTENDED(Engine) \ /* -------------- Extended Uniform -----------------------*/ \ @@ -355,7 +355,7 @@ void BM_Thread(benchmark::State& state) { BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution, 410, \ 580); \ BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution, 199); \ - BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution, 199); + BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution, 199) // ABSL Recommended interfaces. BM_BASIC(absl::InsecureBitGen); // === pcg64_2018_engine diff --git a/yass/third_party/abseil-cpp/absl/random/distributions.h b/yass/third_party/abseil-cpp/absl/random/distributions.h index 4e3b332e0f..16b76eeaaf 100644 --- a/yass/third_party/abseil-cpp/absl/random/distributions.h +++ b/yass/third_party/abseil-cpp/absl/random/distributions.h @@ -362,7 +362,7 @@ RealType Gaussian(URBG&& urbg, // NOLINT(runtime/references) // If `lo` is nonzero then this distribution is shifted to the desired interval, // so LogUniform(lo, hi, b) is equivalent to LogUniform(0, hi-lo, b)+lo. // -// See https://en.wikipedia.org/wiki/Log-normal_distribution +// See https://en.wikipedia.org/wiki/Reciprocal_distribution // // Example: // diff --git a/yass/third_party/abseil-cpp/absl/status/internal/statusor_internal.h b/yass/third_party/abseil-cpp/absl/status/internal/statusor_internal.h index 5be94903b7..414aa3002d 100644 --- a/yass/third_party/abseil-cpp/absl/status/internal/statusor_internal.h +++ b/yass/third_party/abseil-cpp/absl/status/internal/statusor_internal.h @@ -123,6 +123,65 @@ using IsForwardingAssignmentValid = absl::disjunction< std::is_same>, IsForwardingAssignmentAmbiguous>>>; +template +using Equality = std::conditional_t>; + +template +using IsConstructionValid = absl::conjunction< + Equality>, + IsDirectInitializationValid, std::is_constructible, + Equality>, + absl::disjunction< + std::is_same>, + absl::conjunction< + std::conditional_t< + Explicit, + absl::negation>, + absl::negation>>, + absl::negation< + internal_statusor::HasConversionOperatorToStatusOr>>>>; + +template +using IsAssignmentValid = absl::conjunction< + Equality>, + std::is_constructible, std::is_assignable, + absl::disjunction< + std::is_same>, + absl::conjunction< + absl::negation>, + absl::negation>>>, + IsForwardingAssignmentValid>; + +template +using IsConstructionFromStatusValid = absl::conjunction< + absl::negation, absl::remove_cvref_t>>, + absl::negation>>, + absl::negation>>, + Equality>, + std::is_constructible, + absl::negation>>; + +template +using IsConstructionFromStatusOrValid = absl::conjunction< + absl::negation>, + Equality>, + std::is_constructible, + Equality>, + absl::negation>>; + +template +using IsStatusOrAssignmentValid = absl::conjunction< + absl::negation>>, + Equality>, + std::is_constructible, std::is_assignable, + absl::negation>>>; + class Helper { public: // Move type-agnostic error handling to the .cc. diff --git a/yass/third_party/abseil-cpp/absl/status/status.cc b/yass/third_party/abseil-cpp/absl/status/status.cc index 4dd5ae06ce..745ab88922 100644 --- a/yass/third_party/abseil-cpp/absl/status/status.cc +++ b/yass/third_party/abseil-cpp/absl/status/status.cc @@ -273,14 +273,12 @@ StatusCode ErrnoToStatusCode(int error_number) { case EFAULT: // Bad address case EILSEQ: // Illegal byte sequence case ENOPROTOOPT: // Protocol not available - case ENOSTR: // Not a STREAM case ENOTSOCK: // Not a socket case ENOTTY: // Inappropriate I/O control operation case EPROTOTYPE: // Protocol wrong type for socket case ESPIPE: // Invalid seek return StatusCode::kInvalidArgument; case ETIMEDOUT: // Connection timed out - case ETIME: // Timer expired return StatusCode::kDeadlineExceeded; case ENODEV: // No such device case ENOENT: // No such file or directory @@ -339,9 +337,7 @@ StatusCode ErrnoToStatusCode(int error_number) { case EMLINK: // Too many links case ENFILE: // Too many open files in system case ENOBUFS: // No buffer space available - case ENODATA: // No message is available on the STREAM read queue case ENOMEM: // Not enough space - case ENOSR: // No STREAM resources #ifdef EUSERS case EUSERS: // Too many users #endif diff --git a/yass/third_party/abseil-cpp/absl/status/statusor.h b/yass/third_party/abseil-cpp/absl/status/statusor.h index cd35e5b436..b1da45e6f0 100644 --- a/yass/third_party/abseil-cpp/absl/status/statusor.h +++ b/yass/third_party/abseil-cpp/absl/status/statusor.h @@ -236,57 +236,55 @@ class StatusOr : private internal_statusor::StatusOrData, // is explicit if and only if the corresponding construction of `T` from `U` // is explicit. (This constructor inherits its explicitness from the // underlying constructor.) - template < - typename U, - absl::enable_if_t< - absl::conjunction< - absl::negation>, - std::is_constructible, - std::is_convertible, - absl::negation< - internal_statusor::IsConstructibleOrConvertibleFromStatusOr< - T, U>>>::value, - int> = 0> + template ::value, + int> = 0> StatusOr(const StatusOr& other) // NOLINT : Base(static_cast::Base&>(other)) {} - template < - typename U, - absl::enable_if_t< - absl::conjunction< - absl::negation>, - std::is_constructible, - absl::negation>, - absl::negation< - internal_statusor::IsConstructibleOrConvertibleFromStatusOr< - T, U>>>::value, - int> = 0> + template ::value, + int> = 0> + StatusOr(const StatusOr& other ABSL_ATTRIBUTE_LIFETIME_BOUND) // NOLINT + : Base(static_cast::Base&>(other)) {} + template ::value, + int> = 0> explicit StatusOr(const StatusOr& other) : Base(static_cast::Base&>(other)) {} + template ::value, + int> = 0> + explicit StatusOr(const StatusOr& other ABSL_ATTRIBUTE_LIFETIME_BOUND) + : Base(static_cast::Base&>(other)) {} - template < - typename U, - absl::enable_if_t< - absl::conjunction< - absl::negation>, std::is_constructible, - std::is_convertible, - absl::negation< - internal_statusor::IsConstructibleOrConvertibleFromStatusOr< - T, U>>>::value, - int> = 0> + template ::value, + int> = 0> StatusOr(StatusOr&& other) // NOLINT : Base(static_cast::Base&&>(other)) {} - template < - typename U, - absl::enable_if_t< - absl::conjunction< - absl::negation>, std::is_constructible, - absl::negation>, - absl::negation< - internal_statusor::IsConstructibleOrConvertibleFromStatusOr< - T, U>>>::value, - int> = 0> + template ::value, + int> = 0> + StatusOr(StatusOr&& other ABSL_ATTRIBUTE_LIFETIME_BOUND) // NOLINT + : Base(static_cast::Base&&>(other)) {} + template ::value, + int> = 0> explicit StatusOr(StatusOr&& other) : Base(static_cast::Base&&>(other)) {} + template ::value, + int> = 0> + explicit StatusOr(StatusOr&& other ABSL_ATTRIBUTE_LIFETIME_BOUND) + : Base(static_cast::Base&&>(other)) {} // Converting Assignment Operators @@ -307,37 +305,38 @@ class StatusOr : private internal_statusor::StatusOrData, // These overloads only apply if `absl::StatusOr` is constructible and // assignable from `absl::StatusOr` and `StatusOr` cannot be directly // assigned from `StatusOr`. - template < - typename U, - absl::enable_if_t< - absl::conjunction< - absl::negation>, - std::is_constructible, - std::is_assignable, - absl::negation< - internal_statusor:: - IsConstructibleOrConvertibleOrAssignableFromStatusOr< - T, U>>>::value, - int> = 0> + template ::value, + int> = 0> StatusOr& operator=(const StatusOr& other) { this->Assign(other); return *this; } - template < - typename U, - absl::enable_if_t< - absl::conjunction< - absl::negation>, std::is_constructible, - std::is_assignable, - absl::negation< - internal_statusor:: - IsConstructibleOrConvertibleOrAssignableFromStatusOr< - T, U>>>::value, - int> = 0> + template ::value, + int> = 0> + StatusOr& operator=(const StatusOr& other ABSL_ATTRIBUTE_LIFETIME_BOUND) { + this->Assign(other); + return *this; + } + template ::value, + int> = 0> StatusOr& operator=(StatusOr&& other) { this->Assign(std::move(other)); return *this; } + template ::value, + int> = 0> + StatusOr& operator=(StatusOr&& other ABSL_ATTRIBUTE_LIFETIME_BOUND) { + this->Assign(std::move(other)); + return *this; + } // Constructs a new `absl::StatusOr` with a non-ok status. After calling // this constructor, `this->ok()` will be `false` and calls to `value()` will @@ -350,46 +349,21 @@ class StatusOr : private internal_statusor::StatusOrData, // REQUIRES: !Status(std::forward(v)).ok(). This requirement is DCHECKed. // In optimized builds, passing absl::OkStatus() here will have the effect // of passing absl::StatusCode::kInternal as a fallback. - template < - typename U = absl::Status, - absl::enable_if_t< - absl::conjunction< - std::is_convertible, - std::is_constructible, - absl::negation, absl::StatusOr>>, - absl::negation, T>>, - absl::negation, absl::in_place_t>>, - absl::negation>>::value, - int> = 0> + template ::value, + int> = 0> StatusOr(U&& v) : Base(std::forward(v)) {} - template < - typename U = absl::Status, - absl::enable_if_t< - absl::conjunction< - absl::negation>, - std::is_constructible, - absl::negation, absl::StatusOr>>, - absl::negation, T>>, - absl::negation, absl::in_place_t>>, - absl::negation>>::value, - int> = 0> + template ::value, + int> = 0> explicit StatusOr(U&& v) : Base(std::forward(v)) {} - - template < - typename U = absl::Status, - absl::enable_if_t< - absl::conjunction< - std::is_convertible, - std::is_constructible, - absl::negation, absl::StatusOr>>, - absl::negation, T>>, - absl::negation, absl::in_place_t>>, - absl::negation>>::value, - int> = 0> + template ::value, + int> = 0> StatusOr& operator=(U&& v) { this->AssignStatus(std::forward(v)); return *this; @@ -411,21 +385,22 @@ class StatusOr : private internal_statusor::StatusOrData, // StatusOr s1 = true; // s1.ok() && *s1 == true // StatusOr s2 = false; // s2.ok() && *s2 == false // s1 = s2; // ambiguous, `s1 = *s2` or `s1 = bool(s2)`? - template < - typename U = T, - typename = typename std::enable_if, std::is_assignable, - absl::disjunction< - std::is_same, T>, - absl::conjunction< - absl::negation>, - absl::negation>>>, - internal_statusor::IsForwardingAssignmentValid>::value>::type> + template ::value, + int>::type = 0> StatusOr& operator=(U&& v) { this->Assign(std::forward(v)); return *this; } + template ::value, + int>::type = 0> + StatusOr& operator=(U&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) { + this->Assign(std::forward(v)); + return *this; + } // Constructs the inner value `T` in-place using the provided args, using the // `T(args...)` constructor. @@ -442,40 +417,31 @@ class StatusOr : private internal_statusor::StatusOrData, // This constructor is explicit if `U` is not convertible to `T`. To avoid // ambiguity, this constructor is disabled if `U` is a `StatusOr`, where // `J` is convertible to `T`. - template < - typename U = T, - absl::enable_if_t< - absl::conjunction< - internal_statusor::IsDirectInitializationValid, - std::is_constructible, std::is_convertible, - absl::disjunction< - std::is_same, T>, - absl::conjunction< - absl::negation>, - absl::negation< - internal_statusor::HasConversionOperatorToStatusOr< - T, U&&>>>>>::value, - int> = 0> + template ::value, + int> = 0> StatusOr(U&& u) // NOLINT : StatusOr(absl::in_place, std::forward(u)) {} + template ::value, + int> = 0> + StatusOr(U&& u ABSL_ATTRIBUTE_LIFETIME_BOUND) // NOLINT + : StatusOr(absl::in_place, std::forward(u)) {} - template < - typename U = T, - absl::enable_if_t< - absl::conjunction< - internal_statusor::IsDirectInitializationValid, - absl::disjunction< - std::is_same, T>, - absl::conjunction< - absl::negation>, - absl::negation< - internal_statusor::HasConversionOperatorToStatusOr< - T, U&&>>>>, - std::is_constructible, - absl::negation>>::value, - int> = 0> + template ::value, + int> = 0> explicit StatusOr(U&& u) // NOLINT : StatusOr(absl::in_place, std::forward(u)) {} + template ::value, + int> = 0> + explicit StatusOr(U&& u ABSL_ATTRIBUTE_LIFETIME_BOUND) // NOLINT + : StatusOr(absl::in_place, std::forward(u)) {} // StatusOr::ok() // diff --git a/yass/third_party/abseil-cpp/absl/strings/BUILD.bazel b/yass/third_party/abseil-cpp/absl/strings/BUILD.bazel index 8b30783259..d93a78a962 100644 --- a/yass/third_party/abseil-cpp/absl/strings/BUILD.bazel +++ b/yass/third_party/abseil-cpp/absl/strings/BUILD.bazel @@ -359,6 +359,7 @@ cc_test( "//absl/base:config", "//absl/base:core_headers", "//absl/base:dynamic_annotations", + "//absl/meta:type_traits", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], diff --git a/yass/third_party/abseil-cpp/absl/strings/CMakeLists.txt b/yass/third_party/abseil-cpp/absl/strings/CMakeLists.txt index 9258e553ac..53e8518821 100644 --- a/yass/third_party/abseil-cpp/absl/strings/CMakeLists.txt +++ b/yass/third_party/abseil-cpp/absl/strings/CMakeLists.txt @@ -274,6 +274,7 @@ absl_cc_test( absl::config absl::core_headers absl::dynamic_annotations + absl::type_traits GTest::gmock_main ) diff --git a/yass/third_party/abseil-cpp/absl/strings/ascii.cc b/yass/third_party/abseil-cpp/absl/strings/ascii.cc index 5460b2c6dc..20a696a1f6 100644 --- a/yass/third_party/abseil-cpp/absl/strings/ascii.cc +++ b/yass/third_party/abseil-cpp/absl/strings/ascii.cc @@ -15,13 +15,14 @@ #include "absl/strings/ascii.h" #include -#include +#include #include #include -#include +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/nullability.h" +#include "absl/base/optimization.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -162,19 +163,6 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on -template -static constexpr T BroadcastByte(unsigned char value) { - static_assert(std::is_integral::value && sizeof(T) <= sizeof(uint64_t) && - std::is_unsigned::value, - "only unsigned integers up to 64-bit allowed"); - T result = value; - constexpr size_t result_bit_width = sizeof(result) * CHAR_BIT; - result |= result << ((CHAR_BIT << 0) & (result_bit_width - 1)); - result |= result << ((CHAR_BIT << 1) & (result_bit_width - 1)); - result |= result << ((CHAR_BIT << 2) & (result_bit_width - 1)); - return result; -} - // Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`). // Implemented by: // 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26). @@ -189,47 +177,10 @@ constexpr bool AsciiInAZRange(unsigned char c) { return static_cast(u) < threshold; } +// Force-inline so the compiler won't merge the short and long implementations. template -static constexpr char* PartialAsciiStrCaseFold(absl::Nonnull p, - absl::Nonnull end) { - using vec_t = size_t; - const size_t n = static_cast(end - p); - - // SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html - constexpr char ch_a = ToUpper ? 'a' : 'A', ch_z = ToUpper ? 'z' : 'Z'; - char* const swar_end = p + (n / sizeof(vec_t)) * sizeof(vec_t); - while (p < swar_end) { - vec_t v = vec_t(); - - // memcpy the vector, but constexpr - for (size_t i = 0; i < sizeof(vec_t); ++i) { - v |= static_cast(static_cast(p[i])) - << (i * CHAR_BIT); - } - - constexpr unsigned int msb = 1u << (CHAR_BIT - 1); - const vec_t v_msb = v & BroadcastByte(msb); - const vec_t v_nonascii_mask = (v_msb << 1) - (v_msb >> (CHAR_BIT - 1)); - const vec_t v_nonascii = v & v_nonascii_mask; - const vec_t v_ascii = v & ~v_nonascii_mask; - const vec_t a = v_ascii + BroadcastByte(msb - ch_a - 0), - z = v_ascii + BroadcastByte(msb - ch_z - 1); - v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte(msb)) >> 2); - - // memcpy the vector, but constexpr - for (size_t i = 0; i < sizeof(vec_t); ++i) { - p[i] = static_cast(v >> (i * CHAR_BIT)); - } - - p += sizeof(v); - } - - return p; -} - -template -static constexpr void AsciiStrCaseFold(absl::Nonnull p, - absl::Nonnull end) { +ABSL_ATTRIBUTE_ALWAYS_INLINE inline constexpr void AsciiStrCaseFoldImpl( + absl::Nonnull p, size_t size) { // The upper- and lowercase versions of ASCII characters differ by only 1 bit. // When we need to flip the case, we can xor with this bit to achieve the // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We @@ -237,20 +188,32 @@ static constexpr void AsciiStrCaseFold(absl::Nonnull p, // have the same single bit difference. constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; - using vec_t = size_t; - // TODO(b/316380338): When FDO becomes able to vectorize these, - // revert this manual optimization and just leave the naive loop. - if (static_cast(end - p) >= sizeof(vec_t)) { - p = ascii_internal::PartialAsciiStrCaseFold(p, end); - } - while (p < end) { - unsigned char v = static_cast(*p); + for (size_t i = 0; i < size; ++i) { + unsigned char v = static_cast(p[i]); v ^= AsciiInAZRange(v) ? kAsciiCaseBitFlip : 0; - *p = static_cast(v); - ++p; + p[i] = static_cast(v); } } +// The string size threshold for starting using the long string version. +constexpr size_t kCaseFoldThreshold = 16; + +// No-inline so the compiler won't merge the short and long implementations. +template +ABSL_ATTRIBUTE_NOINLINE constexpr void AsciiStrCaseFoldLong( + absl::Nonnull p, size_t size) { + ABSL_ASSUME(size >= kCaseFoldThreshold); + AsciiStrCaseFoldImpl(p, size); +} + +// Splitting to short and long strings to allow vectorization decisions +// to be made separately in the long and short cases. +template +constexpr void AsciiStrCaseFold(absl::Nonnull p, size_t size) { + size < kCaseFoldThreshold ? AsciiStrCaseFoldImpl(p, size) + : AsciiStrCaseFoldLong(p, size); +} + static constexpr size_t ValidateAsciiCasefold() { constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; size_t incorrect_index = 0; @@ -259,8 +222,8 @@ static constexpr size_t ValidateAsciiCasefold() { for (unsigned int i = 0; i < num_chars; ++i) { uppered[i] = lowered[i] = static_cast(i); } - AsciiStrCaseFold(&lowered[0], &lowered[num_chars]); - AsciiStrCaseFold(&uppered[0], &uppered[num_chars]); + AsciiStrCaseFold(&lowered[0], num_chars); + AsciiStrCaseFold(&uppered[0], num_chars); for (size_t i = 0; i < num_chars; ++i) { const char ch = static_cast(i), ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), @@ -278,13 +241,11 @@ static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); } // namespace ascii_internal void AsciiStrToLower(absl::Nonnull s) { - char* p = &(*s)[0]; // Guaranteed to be valid for empty strings - return ascii_internal::AsciiStrCaseFold(p, p + s->size()); + return ascii_internal::AsciiStrCaseFold(&(*s)[0], s->size()); } void AsciiStrToUpper(absl::Nonnull s) { - char* p = &(*s)[0]; // Guaranteed to be valid for empty strings - return ascii_internal::AsciiStrCaseFold(p, p + s->size()); + return ascii_internal::AsciiStrCaseFold(&(*s)[0], s->size()); } void RemoveExtraAsciiWhitespace(absl::Nonnull str) { diff --git a/yass/third_party/abseil-cpp/absl/strings/ascii_test.cc b/yass/third_party/abseil-cpp/absl/strings/ascii_test.cc index 117140c174..8885bb15d4 100644 --- a/yass/third_party/abseil-cpp/absl/strings/ascii_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/ascii_test.cc @@ -190,11 +190,13 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); + const std::string long_str("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!a"); std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + EXPECT_EQ("abcdefghijklmnopqrstuvwxyz1!a", absl::AsciiStrToLower(long_str)); absl::AsciiStrToLower(&mutable_str); EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str); @@ -210,10 +212,12 @@ TEST(AsciiStrTo, Upper) { const std::string str("ghijkl"); const std::string str2("_`?@[{amnopqrstuvwxyz"); const absl::string_view sp(str2); + const std::string long_str("abcdefghijklmnopqrstuvwxyz1!A"); EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp)); + EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!A", absl::AsciiStrToUpper(long_str)); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), diff --git a/yass/third_party/abseil-cpp/absl/strings/cord.cc b/yass/third_party/abseil-cpp/absl/strings/cord.cc index f67326fdef..ea84e44618 100644 --- a/yass/third_party/abseil-cpp/absl/strings/cord.cc +++ b/yass/third_party/abseil-cpp/absl/strings/cord.cc @@ -425,8 +425,8 @@ Cord& Cord::operator=(absl::string_view src) { // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(absl::string_view src, MethodIdentifier method) { - MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + MaybeRemoveEmptyCrcNode(); size_t appended = 0; CordRep* rep = tree(); @@ -1451,8 +1451,6 @@ absl::string_view Cord::FlattenSlowPath() { static void DumpNode(absl::Nonnull rep, bool include_data, absl::Nonnull os, int indent) { const int kIndentStep = 1; - absl::InlinedVector stack; - absl::InlinedVector indents; for (;;) { *os << std::setw(3) << rep->refcount.Get(); *os << " " << std::setw(7) << rep->length; @@ -1477,26 +1475,23 @@ static void DumpNode(absl::Nonnull rep, bool include_data, if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) - *os << absl::CEscape(std::string(rep->external()->base, rep->length)); + *os << absl::CEscape( + absl::string_view(rep->external()->base, rep->length)); *os << "]\n"; } else if (rep->IsFlat()) { *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; if (include_data) - *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length)); + *os << absl::CEscape( + absl::string_view(rep->flat()->Data(), rep->length)); *os << "]\n"; } else { CordRepBtree::Dump(rep, /*label=*/"", include_data, *os); } } if (leaf) { - if (stack.empty()) break; - rep = stack.back(); - stack.pop_back(); - indent = indents.back(); - indents.pop_back(); + break; } } - ABSL_INTERNAL_CHECK(indents.empty(), ""); } static std::string ReportError(absl::Nonnull root, diff --git a/yass/third_party/abseil-cpp/absl/strings/cord.h b/yass/third_party/abseil-cpp/absl/strings/cord.h index 2583aa8ac4..c9f6767b4c 100644 --- a/yass/third_party/abseil-cpp/absl/strings/cord.h +++ b/yass/third_party/abseil-cpp/absl/strings/cord.h @@ -75,6 +75,7 @@ #include "absl/base/internal/per_thread_tls.h" #include "absl/base/macros.h" #include "absl/base/nullability.h" +#include "absl/base/optimization.h" #include "absl/base/port.h" #include "absl/container/inlined_vector.h" #include "absl/crc/internal/crc_cord_state.h" @@ -1353,7 +1354,8 @@ inline size_t Cord::EstimatedMemoryUsage( return result; } -inline absl::optional Cord::TryFlat() const { +inline absl::optional Cord::TryFlat() const + ABSL_ATTRIBUTE_LIFETIME_BOUND { absl::cord_internal::CordRep* rep = contents_.tree(); if (rep == nullptr) { return absl::string_view(contents_.data(), contents_.size()); @@ -1365,7 +1367,7 @@ inline absl::optional Cord::TryFlat() const { return absl::nullopt; } -inline absl::string_view Cord::Flatten() { +inline absl::string_view Cord::Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND { absl::cord_internal::CordRep* rep = contents_.tree(); if (rep == nullptr) { return absl::string_view(contents_.data(), contents_.size()); @@ -1388,6 +1390,7 @@ inline void Cord::Prepend(absl::string_view src) { inline void Cord::Append(CordBuffer buffer) { if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; + contents_.MaybeRemoveEmptyCrcNode(); absl::string_view short_value; if (CordRep* rep = buffer.ConsumeValue(short_value)) { contents_.AppendTree(rep, CordzUpdateTracker::kAppendCordBuffer); @@ -1398,6 +1401,7 @@ inline void Cord::Append(CordBuffer buffer) { inline void Cord::Prepend(CordBuffer buffer) { if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; + contents_.MaybeRemoveEmptyCrcNode(); absl::string_view short_value; if (CordRep* rep = buffer.ConsumeValue(short_value)) { contents_.PrependTree(rep, CordzUpdateTracker::kPrependCordBuffer); diff --git a/yass/third_party/abseil-cpp/absl/strings/escaping.cc b/yass/third_party/abseil-cpp/absl/strings/escaping.cc index af50faebfb..4ffef94b63 100644 --- a/yass/third_party/abseil-cpp/absl/strings/escaping.cc +++ b/yass/third_party/abseil-cpp/absl/strings/escaping.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" @@ -969,25 +970,27 @@ std::string WebSafeBase64Escape(absl::string_view src) { bool HexStringToBytes(absl::string_view hex, absl::Nonnull bytes) { + std::string output; + size_t num_bytes = hex.size() / 2; - bytes->clear(); if (hex.size() != num_bytes * 2) { return false; } - absl::strings_internal::STLStringResizeUninitialized(bytes, num_bytes); + absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes); auto hex_p = hex.cbegin(); - for (std::string::iterator bin_p = bytes->begin(); - bin_p != bytes->end(); ++bin_p) { + for (std::string::iterator bin_p = output.begin(); bin_p != output.end(); + ++bin_p) { int h1 = absl::kHexValueStrict[static_cast(*hex_p++)]; int h2 = absl::kHexValueStrict[static_cast(*hex_p++)]; if (h1 == -1 || h2 == -1) { - bytes->resize(static_cast(bin_p - bytes->begin())); + output.resize(static_cast(bin_p - output.begin())); return false; } *bin_p = static_cast((h1 << 4) + h2); } + *bytes = std::move(output); return true; } diff --git a/yass/third_party/abseil-cpp/absl/strings/escaping_test.cc b/yass/third_party/abseil-cpp/absl/strings/escaping_test.cc index ae3466a7d1..25cb685bef 100644 --- a/yass/third_party/abseil-cpp/absl/strings/escaping_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/escaping_test.cc @@ -706,6 +706,13 @@ TEST(Escaping, HexStringToBytesBackToHex) { hex = absl::BytesToHexString(kTestBytes); EXPECT_EQ(hex, kTestHexLower); + // Same buffer. + // We do not care if this works since we do not promise it in the contract. + // The purpose of this test is to to see if the program will crash or if + // sanitizers will catch anything. + bytes = std::string(kTestHexUpper); + (void)absl::HexStringToBytes(bytes, &bytes); + // Length not a multiple of two. EXPECT_FALSE(absl::HexStringToBytes("1c2f003", &bytes)); diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc index 20d314f03c..6033d04696 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc @@ -40,13 +40,15 @@ std::atomic g_cordz_mean_interval(50000); // Special negative 'not initialized' per thread value for cordz_next_sample. static constexpr int64_t kInitCordzNextSample = -1; -ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample; +ABSL_CONST_INIT thread_local SamplingState cordz_next_sample = { + kInitCordzNextSample, 1}; // kIntervalIfDisabled is the number of profile-eligible events need to occur // before the code will confirm that cordz is still disabled. constexpr int64_t kIntervalIfDisabled = 1 << 16; -ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { +ABSL_ATTRIBUTE_NOINLINE int64_t +cordz_should_profile_slow(SamplingState& state) { thread_local absl::profiling_internal::ExponentialBiased exponential_biased_generator; @@ -55,30 +57,34 @@ ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { // Check if we disabled profiling. If so, set the next sample to a "large" // number to minimize the overhead of the should_profile codepath. if (mean_interval <= 0) { - cordz_next_sample = kIntervalIfDisabled; - return false; + state = {kIntervalIfDisabled, kIntervalIfDisabled}; + return 0; } // Check if we're always sampling. if (mean_interval == 1) { - cordz_next_sample = 1; - return true; + state = {1, 1}; + return 1; } - if (cordz_next_sample <= 0) { + if (cordz_next_sample.next_sample <= 0) { // If first check on current thread, check cordz_should_profile() // again using the created (initial) stride in cordz_next_sample. - const bool initialized = cordz_next_sample != kInitCordzNextSample; - cordz_next_sample = exponential_biased_generator.GetStride(mean_interval); - return initialized || cordz_should_profile(); + const bool initialized = + cordz_next_sample.next_sample != kInitCordzNextSample; + auto old_stride = state.sample_stride; + auto stride = exponential_biased_generator.GetStride(mean_interval); + state = {stride, stride}; + bool should_sample = initialized || cordz_should_profile() > 0; + return should_sample ? old_stride : 0; } - --cordz_next_sample; - return false; + --state.next_sample; + return 0; } void cordz_set_next_sample_for_testing(int64_t next_sample) { - cordz_next_sample = next_sample; + cordz_next_sample = {next_sample, next_sample}; } #endif // ABSL_INTERNAL_CORDZ_ENABLED diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h index ed108bf1b4..84c185e407 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h @@ -41,23 +41,33 @@ void set_cordz_mean_interval(int32_t mean_interval); #ifdef ABSL_INTERNAL_CORDZ_ENABLED +struct SamplingState { + int64_t next_sample; + int64_t sample_stride; +}; + // cordz_next_sample is the number of events until the next sample event. If // the value is 1 or less, the code will check on the next event if cordz is // enabled, and if so, will sample the Cord. cordz is only enabled when we can // use thread locals. -ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample; +ABSL_CONST_INIT extern thread_local SamplingState cordz_next_sample; -// Determines if the next sample should be profiled. If it is, the value pointed -// at by next_sample will be set with the interval until the next sample. -bool cordz_should_profile_slow(); +// Determines if the next sample should be profiled. +// Returns: +// 0: Do not sample +// >0: Sample with the stride of the last sampling period +int64_t cordz_should_profile_slow(SamplingState& state); -// Returns true if the next cord should be sampled. -inline bool cordz_should_profile() { - if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) { - cordz_next_sample--; - return false; +// Determines if the next sample should be profiled. +// Returns: +// 0: Do not sample +// >0: Sample with the stride of the last sampling period +inline int64_t cordz_should_profile() { + if (ABSL_PREDICT_TRUE(cordz_next_sample.next_sample > 1)) { + cordz_next_sample.next_sample--; + return 0; } - return cordz_should_profile_slow(); + return cordz_should_profile_slow(cordz_next_sample); } // Sets the interval until the next sample (for testing only) @@ -65,7 +75,7 @@ void cordz_set_next_sample_for_testing(int64_t next_sample); #else // ABSL_INTERNAL_CORDZ_ENABLED -inline bool cordz_should_profile() { return false; } +inline int64_t cordz_should_profile() { return 0; } inline void cordz_set_next_sample_for_testing(int64_t) {} #endif // ABSL_INTERNAL_CORDZ_ENABLED diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc index b70a685e37..8fb93d53dd 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc @@ -47,9 +47,9 @@ TEST(CordzFunctionsTest, ShouldProfileDisable) { set_cordz_mean_interval(0); cordz_set_next_sample_for_testing(0); - EXPECT_FALSE(cordz_should_profile()); + EXPECT_EQ(cordz_should_profile(), 0); // 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc. - EXPECT_THAT(cordz_next_sample, Eq(1 << 16)); + EXPECT_THAT(cordz_next_sample.next_sample, Eq(1 << 16)); set_cordz_mean_interval(orig_sample_rate); } @@ -59,8 +59,8 @@ TEST(CordzFunctionsTest, ShouldProfileAlways) { set_cordz_mean_interval(1); cordz_set_next_sample_for_testing(1); - EXPECT_TRUE(cordz_should_profile()); - EXPECT_THAT(cordz_next_sample, Le(1)); + EXPECT_GT(cordz_should_profile(), 0); + EXPECT_THAT(cordz_next_sample.next_sample, Le(1)); set_cordz_mean_interval(orig_sample_rate); } @@ -74,9 +74,7 @@ TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) { do { ++tries; ASSERT_THAT(tries, Le(1000)); - std::thread thread([&sampled] { - sampled = cordz_should_profile(); - }); + std::thread thread([&sampled] { sampled = cordz_should_profile() > 0; }); thread.join(); } while (sampled); } @@ -94,7 +92,7 @@ TEST(CordzFunctionsTest, ShouldProfileRate) { // new value for next_sample each iteration. cordz_set_next_sample_for_testing(0); cordz_should_profile(); - sum_of_intervals += cordz_next_sample; + sum_of_intervals += cordz_next_sample.next_sample; } // The sum of independent exponential variables is an Erlang distribution, diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc index b24c3da712..b7c7fed9fa 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc @@ -14,6 +14,8 @@ #include "absl/strings/internal/cordz_info.h" +#include + #include "absl/base/config.h" #include "absl/base/internal/spinlock.h" #include "absl/container/inlined_vector.h" @@ -247,10 +249,12 @@ CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const { return next; } -void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) { +void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method, + int64_t sampling_stride) { assert(cord.is_tree()); assert(!cord.is_profiled()); - CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method); + CordzInfo* cordz_info = + new CordzInfo(cord.as_tree(), nullptr, method, sampling_stride); cord.set_cordz_info(cordz_info); cordz_info->Track(); } @@ -266,7 +270,8 @@ void CordzInfo::TrackCord(InlineData& cord, const InlineData& src, if (cordz_info != nullptr) cordz_info->Untrack(); // Start new cord sample - cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method); + cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method, + src.cordz_info()->sampling_stride()); cord.set_cordz_info(cordz_info); cordz_info->Track(); } @@ -298,9 +303,8 @@ size_t CordzInfo::FillParentStack(const CordzInfo* src, void** stack) { return src->stack_depth_; } -CordzInfo::CordzInfo(CordRep* rep, - const CordzInfo* src, - MethodIdentifier method) +CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method, int64_t sampling_stride) : rep_(rep), stack_depth_( static_cast(absl::GetStackTrace(stack_, @@ -309,7 +313,8 @@ CordzInfo::CordzInfo(CordRep* rep, parent_stack_depth_(FillParentStack(src, parent_stack_)), method_(method), parent_method_(GetParentMethod(src)), - create_time_(absl::Now()) { + create_time_(absl::Now()), + sampling_stride_(sampling_stride) { update_tracker_.LossyAdd(method); if (src) { // Copy parent counters. diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.h b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.h index 17eaa91c77..2dc9d16def 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.h +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info.h @@ -60,7 +60,8 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { // and/or deleted. `method` identifies the Cord public API method initiating // the cord to be sampled. // Requires `cord` to hold a tree, and `cord.cordz_info()` to be null. - static void TrackCord(InlineData& cord, MethodIdentifier method); + static void TrackCord(InlineData& cord, MethodIdentifier method, + int64_t sampling_stride); // Identical to TrackCord(), except that this function fills the // `parent_stack` and `parent_method` properties of the returned CordzInfo @@ -181,6 +182,8 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { // or RemovePrefix. CordzStatistics GetCordzStatistics() const; + int64_t sampling_stride() const { return sampling_stride_; } + private: using SpinLock = absl::base_internal::SpinLock; using SpinLockHolder = ::absl::base_internal::SpinLockHolder; @@ -199,7 +202,7 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { static constexpr size_t kMaxStackDepth = 64; explicit CordzInfo(CordRep* rep, const CordzInfo* src, - MethodIdentifier method); + MethodIdentifier method, int64_t weight); ~CordzInfo() override; // Sets `rep_` without holding a lock. @@ -250,12 +253,14 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { const MethodIdentifier parent_method_; CordzUpdateTracker update_tracker_; const absl::Time create_time_; + const int64_t sampling_stride_; }; inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( InlineData& cord, MethodIdentifier method) { - if (ABSL_PREDICT_FALSE(cordz_should_profile())) { - TrackCord(cord, method); + auto stride = cordz_should_profile(); + if (ABSL_PREDICT_FALSE(stride > 0)) { + TrackCord(cord, method, stride); } } diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc index d55773f2d4..3e6a8a09d7 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc @@ -152,7 +152,7 @@ size_t FairShare(CordRep* rep, size_t ref = 1) { // Samples the cord and returns CordzInfo::GetStatistics() CordzStatistics SampleCord(CordRep* rep) { InlineData cord(rep); - CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown, 1); CordzStatistics stats = cord.cordz_info()->GetCordzStatistics(); cord.cordz_info()->Untrack(); return stats; @@ -480,7 +480,7 @@ TEST(CordzInfoStatisticsTest, ThreadSafety) { // 50/50 sample if (coin_toss(gen) != 0) { - CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown, 1); } } } diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc index cd226c3ed5..81ecce2cd1 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc @@ -65,7 +65,7 @@ std::string FormatStack(absl::Span raw_stack) { TEST(CordzInfoTest, TrackCord) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); ASSERT_THAT(info, Ne(nullptr)); EXPECT_FALSE(info->is_snapshot()); @@ -91,7 +91,7 @@ TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) { TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { CordzSamplingIntervalHelper sample_none(99999); TestCordData parent, child; - CordzInfo::TrackCord(parent.data, kTrackCordMethod); + CordzInfo::TrackCord(parent.data, kTrackCordMethod, 1); CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); CordzInfo* parent_info = parent.data.cordz_info(); CordzInfo* child_info = child.data.cordz_info(); @@ -105,7 +105,7 @@ TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { CordzSamplingIntervalHelper sample_none(99999); TestCordData parent, child; - CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::TrackCord(child.data, kTrackCordMethod, 1); CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); } @@ -113,14 +113,14 @@ TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) { CordzSamplingIntervalHelper sample_all(1); TestCordData parent, child; - CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::TrackCord(child.data, kTrackCordMethod, 1); CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); } TEST(CordzInfoTest, UntrackCord) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); info->Untrack(); @@ -129,7 +129,7 @@ TEST(CordzInfoTest, UntrackCord) { TEST(CordzInfoTest, UntrackCordWithSnapshot) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); CordzSnapshot snapshot; @@ -141,7 +141,7 @@ TEST(CordzInfoTest, UntrackCordWithSnapshot) { TEST(CordzInfoTest, SetCordRep) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); TestCordRep rep; @@ -155,7 +155,7 @@ TEST(CordzInfoTest, SetCordRep) { TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); info->Lock(CordzUpdateTracker::kAppendString); @@ -169,7 +169,7 @@ TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { TEST(CordzInfoTest, RefCordRep) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); size_t refcount = data.rep.rep->refcount.Get(); @@ -183,7 +183,7 @@ TEST(CordzInfoTest, RefCordRep) { TEST(CordzInfoTest, SetCordRepRequiresMutex) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); TestCordRep rep; EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*"); @@ -197,13 +197,13 @@ TEST(CordzInfoTest, TrackUntrackHeadFirstV2) { EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info1 = data.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); TestCordData data2; - CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo::TrackCord(data2.data, kTrackCordMethod, 1); CordzInfo* info2 = data2.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); EXPECT_THAT(info2->Next(snapshot), Eq(info1)); @@ -222,13 +222,13 @@ TEST(CordzInfoTest, TrackUntrackTailFirstV2) { EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info1 = data.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); TestCordData data2; - CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo::TrackCord(data2.data, kTrackCordMethod, 1); CordzInfo* info2 = data2.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); EXPECT_THAT(info2->Next(snapshot), Eq(info1)); @@ -254,7 +254,7 @@ TEST(CordzInfoTest, StackV2) { // makes small modifications to its testing stack. 50 is sufficient to prove // that we got a decent stack. static constexpr int kMaxStackDepth = 50; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); std::vector local_stack; local_stack.resize(kMaxStackDepth); @@ -284,7 +284,7 @@ CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) { return data.cordz_info(); } CordzInfo* TrackParentCord(InlineData& data) { - CordzInfo::TrackCord(data, kTrackCordMethod); + CordzInfo::TrackCord(data, kTrackCordMethod, 1); return data.cordz_info(); } diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc index 6be1770d59..7152603d6a 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc @@ -81,11 +81,11 @@ TEST(CordzSampleTokenTest, IteratorEmpty) { TEST(CordzSampleTokenTest, Iterator) { TestCordData cord1, cord2, cord3; - CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo::TrackCord(cord1.data, kTrackCordMethod, 1); CordzInfo* info1 = cord1.data.cordz_info(); - CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod, 1); CordzInfo* info2 = cord2.data.cordz_info(); - CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod, 1); CordzInfo* info3 = cord3.data.cordz_info(); CordzSampleToken token; @@ -105,21 +105,21 @@ TEST(CordzSampleTokenTest, IteratorEquality) { TestCordData cord1; TestCordData cord2; TestCordData cord3; - CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo::TrackCord(cord1.data, kTrackCordMethod, 1); CordzInfo* info1 = cord1.data.cordz_info(); CordzSampleToken token1; // lhs starts with the CordzInfo corresponding to cord1 at the head. CordzSampleToken::Iterator lhs = token1.begin(); - CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod, 1); CordzInfo* info2 = cord2.data.cordz_info(); CordzSampleToken token2; // rhs starts with the CordzInfo corresponding to cord2 at the head. CordzSampleToken::Iterator rhs = token2.begin(); - CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod, 1); CordzInfo* info3 = cord3.data.cordz_info(); // lhs is on cord1 while rhs is on cord2. @@ -170,7 +170,7 @@ TEST(CordzSampleTokenTest, MultiThreaded) { cord.data.clear_cordz_info(); } else { // 2) Track - CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzInfo::TrackCord(cord.data, kTrackCordMethod, 1); } } else { std::unique_ptr& token = tokens[index]; diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc index 3d08c622d0..1b4701f13a 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc @@ -37,7 +37,7 @@ TEST(CordzUpdateScopeTest, ScopeNullptr) { TEST(CordzUpdateScopeTest, ScopeSampledCord) { TestCordData cord; - CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzInfo::TrackCord(cord.data, kTrackCordMethod, 1); CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod); cord.data.cordz_info()->SetCordRep(nullptr); } diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/escaping.cc b/yass/third_party/abseil-cpp/absl/strings/internal/escaping.cc index 56a4cbed03..d2abe66984 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/escaping.cc +++ b/yass/third_party/abseil-cpp/absl/strings/internal/escaping.cc @@ -14,6 +14,8 @@ #include "absl/strings/internal/escaping.h" +#include + #include "absl/base/internal/endian.h" #include "absl/base/internal/raw_logging.h" @@ -31,12 +33,14 @@ ABSL_CONST_INIT const char kBase64Chars[] = ABSL_CONST_INIT const char kWebSafeBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { // Base64 encodes three bytes of input at a time. If the input is not // divisible by three, we pad as appropriate. // // Base64 encodes each three bytes of input into four bytes of output. + constexpr size_t kMaxSize = (std::numeric_limits::max() - 1) / 4 * 3; + ABSL_INTERNAL_CHECK(input_len <= kMaxSize, + "CalculateBase64EscapedLenInternal() overflow"); size_t len = (input_len / 3) * 4; // Since all base 64 input is an integral number of octets, only the following @@ -66,7 +70,6 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { } } - assert(len >= input_len); // make sure we didn't overflow return len; } diff --git a/yass/third_party/abseil-cpp/absl/strings/internal/str_join_internal.h b/yass/third_party/abseil-cpp/absl/strings/internal/str_join_internal.h index d97d5033d8..440d47ff2f 100644 --- a/yass/third_party/abseil-cpp/absl/strings/internal/str_join_internal.h +++ b/yass/third_party/abseil-cpp/absl/strings/internal/str_join_internal.h @@ -31,16 +31,22 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ #define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#include #include #include +#include #include #include +#include #include #include +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/ostringstream.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -230,14 +236,19 @@ std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, if (start != end) { // Sums size auto&& start_value = *start; - size_t result_size = start_value.size(); + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + uint64_t result_size = start_value.size(); for (Iterator it = start; ++it != end;) { result_size += s.size(); result_size += (*it).size(); } if (result_size > 0) { - STLStringResizeUninitialized(&result, result_size); + constexpr uint64_t kMaxSize = + uint64_t{(std::numeric_limits::max)()}; + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); + STLStringResizeUninitialized(&result, static_cast(result_size)); // Joins strings char* result_buf = &*result.begin(); diff --git a/yass/third_party/abseil-cpp/absl/strings/numbers.cc b/yass/third_party/abseil-cpp/absl/strings/numbers.cc index 882c3a8b85..b57d9e82e5 100644 --- a/yass/third_party/abseil-cpp/absl/strings/numbers.cc +++ b/yass/third_party/abseil-cpp/absl/strings/numbers.cc @@ -20,9 +20,7 @@ #include #include #include // for DBL_DIG and FLT_DIG -#include #include // for HUGE_VAL -#include #include #include #include @@ -30,7 +28,6 @@ #include #include #include // NOLINT(build/c++11) -#include #include #include "absl/base/attributes.h" @@ -159,71 +156,28 @@ constexpr uint32_t kTwoZeroBytes = 0x0101 * '0'; constexpr uint64_t kFourZeroBytes = 0x01010101 * '0'; constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0'; -template -constexpr T Pow(T base, uint32_t n) { - // Exponentiation by squaring - return static_cast((n > 1 ? Pow(base * base, n >> 1) : static_cast(1)) * - ((n & 1) ? base : static_cast(1))); -} - -// Given n, calculates C where the following holds for all 0 <= x < Pow(100, n): -// x / Pow(10, n) == x * C / Pow(2, n * 10) -// In other words, it allows us to divide by a power of 10 via a single -// multiplication and bit shifts, assuming the input will be smaller than the -// square of that power of 10. -template -constexpr T ComputePowerOf100DivisionCoefficient(uint32_t n) { - if (n > 4) { - // This doesn't work for large powers of 100, due to overflow - abort(); - } - T denom = 16 - 1; - T num = (denom + 1) - 10; - T gcd = 3; // Greatest common divisor of numerator and denominator - denom = Pow(denom / gcd, n); - num = Pow(num / gcd, 9 * n); - T quotient = num / denom; - if (num % denom >= denom / 2) { - // Round up, since the remainder is more than half the denominator - ++quotient; - } - return quotient; -} - -// * kDivisionBy10Mul / kDivisionBy10Div is a division by 10 for values from 0 -// to 99. It's also a division of a structure [k takes 2 bytes][m takes 2 -// bytes], then * kDivisionBy10Mul / kDivisionBy10Div will be [k / 10][m / 10]. -// It allows parallel division. -constexpr uint64_t kDivisionBy10Mul = - ComputePowerOf100DivisionCoefficient(1); -static_assert(kDivisionBy10Mul == 103, - "division coefficient for 10 is incorrect"); +// * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a +// division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024 +// will be [k / 10][m / 10]. It allows parallel division. +constexpr uint64_t kDivisionBy10Mul = 103u; constexpr uint64_t kDivisionBy10Div = 1 << 10; -// * kDivisionBy100Mul / kDivisionBy100Div is a division by 100 for values from -// 0 to 9999. -constexpr uint64_t kDivisionBy100Mul = - ComputePowerOf100DivisionCoefficient(2); -static_assert(kDivisionBy100Mul == 10486, - "division coefficient for 100 is incorrect"); +// * 10486 / 1048576 is a division by 100 for values from 0 to 9999. +constexpr uint64_t kDivisionBy100Mul = 10486u; constexpr uint64_t kDivisionBy100Div = 1 << 20; -static_assert(ComputePowerOf100DivisionCoefficient(3) == 1073742, - "division coefficient for 1000 is incorrect"); - -// Same as `PrepareEightDigits`, but produces 2 digits for integers < 100. -inline uint32_t PrepareTwoDigitsImpl(uint32_t i, bool reversed) { - assert(i < 100); - uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div; - uint32_t mod10 = i - 10u * div10; - return (div10 << (reversed ? 8 : 0)) + (mod10 << (reversed ? 0 : 8)); -} -inline uint32_t PrepareTwoDigits(uint32_t i) { - return PrepareTwoDigitsImpl(i, false); +// Encode functions write the ASCII output of input `n` to `out_str`. +inline char* EncodeHundred(uint32_t n, absl::Nonnull out_str) { + int num_digits = static_cast(n - 10) >> 8; + uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div; + uint32_t mod10 = n - 10u * div10; + uint32_t base = kTwoZeroBytes + div10 + (mod10 << 8); + base >>= num_digits & 8; + little_endian::Store16(out_str, static_cast(base)); + return out_str + 2 + num_digits; } -// Same as `PrepareEightDigits`, but produces 4 digits for integers < 10000. -inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) { +inline char* EncodeTenThousand(uint32_t n, absl::Nonnull out_str) { // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in @@ -231,19 +185,22 @@ inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) { // strip trailing zeros, add ASCII '0000' and return. uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div; uint32_t mod100 = n - 100ull * div100; - uint32_t hundreds = - (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0)); + uint32_t hundreds = (mod100 << 16) + div100; uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div; tens &= (0xFull << 16) | 0xFull; - tens = (tens << (reversed ? 8 : 0)) + - static_cast((hundreds - 10ull * tens) << (reversed ? 0 : 8)); - return tens; -} -inline uint32_t PrepareFourDigits(uint32_t n) { - return PrepareFourDigitsImpl(n, false); -} -inline uint32_t PrepareFourDigitsReversed(uint32_t n) { - return PrepareFourDigitsImpl(n, true); + tens += (hundreds - 10ull * tens) << 8; + ABSL_ASSUME(tens != 0); + // The result can contain trailing zero bits, we need to strip them to a first + // significant byte in a final representation. For example, for n = 123, we + // have tens to have representation \0\1\2\3. We do `& -8` to round + // to a multiple to 8 to strip zero bytes, not all zero bits. + // countr_zero to help. + // 0 minus 8 to make MSVC happy. + uint32_t zeroes = static_cast(absl::countr_zero(tens)) & (0 - 8u); + tens += kFourZeroBytes; + tens >>= zeroes; + little_endian::Store32(out_str, tens); + return out_str + sizeof(tens) - zeroes / 8; } // Helper function to produce an ASCII representation of `i`. @@ -259,309 +216,126 @@ inline uint32_t PrepareFourDigitsReversed(uint32_t n) { // // Note two leading zeros: // EXPECT_EQ(absl::string_view(ascii, 8), "00102030"); // -// If `Reversed` is set to true, the result becomes reversed to "03020100". -// // Pre-condition: `i` must be less than 100000000. -inline uint64_t PrepareEightDigitsImpl(uint32_t i, bool reversed) { +inline uint64_t PrepareEightDigits(uint32_t i) { ABSL_ASSUME(i < 10000'0000); // Prepare 2 blocks of 4 digits "in parallel". uint32_t hi = i / 10000; uint32_t lo = i % 10000; - uint64_t merged = (uint64_t{hi} << (reversed ? 32 : 0)) | - (uint64_t{lo} << (reversed ? 0 : 32)); + uint64_t merged = hi | (uint64_t{lo} << 32); uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) & ((0x7Full << 32) | 0x7Full); uint64_t mod100 = merged - 100ull * div100; - uint64_t hundreds = - (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0)); + uint64_t hundreds = (mod100 << 16) + div100; uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div; tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull; - tens = (tens << (reversed ? 8 : 0)) + - ((hundreds - 10ull * tens) << (reversed ? 0 : 8)); + tens += (hundreds - 10ull * tens) << 8; return tens; } -inline uint64_t PrepareEightDigits(uint32_t i) { - return PrepareEightDigitsImpl(i, false); -} -inline uint64_t PrepareEightDigitsReversed(uint32_t i) { - return PrepareEightDigitsImpl(i, true); + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE absl::Nonnull EncodeFullU32( + uint32_t n, absl::Nonnull out_str) { + if (n < 10) { + *out_str = static_cast('0' + n); + return out_str + 1; + } + if (n < 100'000'000) { + uint64_t bottom = PrepareEightDigits(n); + ABSL_ASSUME(bottom != 0); + // 0 minus 8 to make MSVC happy. + uint32_t zeroes = + static_cast(absl::countr_zero(bottom)) & (0 - 8u); + little_endian::Store64(out_str, (bottom + kEightZeroBytes) >> zeroes); + return out_str + sizeof(bottom) - zeroes / 8; + } + uint32_t div08 = n / 100'000'000; + uint32_t mod08 = n % 100'000'000; + uint64_t bottom = PrepareEightDigits(mod08) + kEightZeroBytes; + out_str = EncodeHundred(div08, out_str); + little_endian::Store64(out_str, bottom); + return out_str + sizeof(bottom); } -template -class FastUIntToStringConverter { - static_assert( - std::is_same())>::value, - "to avoid code bloat, only instantiate this for int and larger types"); - static_assert(std::is_unsigned::value, - "this class is only for unsigned types"); - - public: - // Outputs the given number backward (like with std::copy_backward), - // starting from the end of the string. - // The number of digits in the number must have been already measured and - // passed *exactly*, otherwise the behavior is undefined. - // (This is an optimization, as calculating the number of digits again would - // slow down the hot path.) - // Returns an iterator to the start of the suffix that was appended. - static BackwardIt FastIntToBufferBackward(T v, BackwardIt end) { - // THIS IS A HOT FUNCTION with a very deliberate structure to exploit branch - // prediction and shorten the critical path for smaller numbers. - // Do not move around the if/else blocks or attempt to simplify it - // without benchmarking any changes. - - if (v < 10) { - goto AT_LEAST_1 /* NOTE: mandatory for the 0 case */; - } - if (v < 1000) { - goto AT_LEAST_10; - } - if (v < 10000000) { - goto AT_LEAST_1000; - } - - if (v >= 100000000 / 10) { - if (v >= 10000000000000000 / 10) { - DoFastIntToBufferBackward<8>(v, end); - } - DoFastIntToBufferBackward<8>(v, end); - } - - if (v >= 10000 / 10) { - AT_LEAST_1000: - DoFastIntToBufferBackward<4>(v, end); - } - - if (v >= 100 / 10) { - AT_LEAST_10: - DoFastIntToBufferBackward<2>(v, end); - } - - if (v >= 10 / 10) { - AT_LEAST_1: - end = DoFastIntToBufferBackward(v, end, std::integral_constant()); - } - return end; +inline ABSL_ATTRIBUTE_ALWAYS_INLINE char* EncodeFullU64(uint64_t i, + char* buffer) { + if (i <= std::numeric_limits::max()) { + return EncodeFullU32(static_cast(i), buffer); } - - private: - // Only assume pointers are contiguous for now. String and vector iterators - // could be special-cased as well, but there's no need for them here. - // With C++20 we can probably switch to std::contiguous_iterator_tag. - static constexpr bool kIsContiguousIterator = - std::is_pointer::value; - - template - static void DoFastIntToBufferBackward(T& v, BackwardIt& end) { - constexpr T kModulus = Pow(10, Exponent); - T remainder = static_cast(v % kModulus); - v = static_cast(v / kModulus); - end = DoFastIntToBufferBackward(remainder, end, - std::integral_constant()); + uint32_t mod08; + if (i < 1'0000'0000'0000'0000ull) { + uint32_t div08 = static_cast(i / 100'000'000ull); + mod08 = static_cast(i % 100'000'000ull); + buffer = EncodeFullU32(div08, buffer); + } else { + uint64_t div08 = i / 100'000'000ull; + mod08 = static_cast(i % 100'000'000ull); + uint32_t div016 = static_cast(div08 / 100'000'000ull); + uint32_t div08mod08 = static_cast(div08 % 100'000'000ull); + uint64_t mid_result = PrepareEightDigits(div08mod08) + kEightZeroBytes; + buffer = EncodeTenThousand(div016, buffer); + little_endian::Store64(buffer, mid_result); + buffer += sizeof(mid_result); } - - static BackwardIt DoFastIntToBufferBackward(const T&, BackwardIt end, - std::integral_constant) { - return end; - } - - static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end, - std::integral_constant) { - *--end = static_cast('0' + v); - return DoFastIntToBufferBackward(v, end, std::integral_constant()); - } - - static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end, - std::integral_constant) { - if (kIsContiguousIterator) { - const uint32_t digits = - PrepareFourDigits(static_cast(v)) + kFourZeroBytes; - end -= sizeof(digits); - little_endian::Store32(&*end, digits); - } else { - uint32_t digits = - PrepareFourDigitsReversed(static_cast(v)) + kFourZeroBytes; - for (size_t i = 0; i < sizeof(digits); ++i) { - *--end = static_cast(digits); - digits >>= CHAR_BIT; - } - } - return end; - } - - static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end, - std::integral_constant) { - if (kIsContiguousIterator) { - const uint64_t digits = - PrepareEightDigits(static_cast(v)) + kEightZeroBytes; - end -= sizeof(digits); - little_endian::Store64(&*end, digits); - } else { - uint64_t digits = PrepareEightDigitsReversed(static_cast(v)) + - kEightZeroBytes; - for (size_t i = 0; i < sizeof(digits); ++i) { - *--end = static_cast(digits); - digits >>= CHAR_BIT; - } - } - return end; - } - - template - static BackwardIt DoFastIntToBufferBackward( - T v, BackwardIt end, std::integral_constant) { - constexpr int kLogModulus = Digits - Digits / 2; - constexpr T kModulus = Pow(static_cast(10), kLogModulus); - bool is_safe_to_use_division_trick = Digits <= 8; - T quotient, remainder; - if (is_safe_to_use_division_trick) { - constexpr uint64_t kCoefficient = - ComputePowerOf100DivisionCoefficient(kLogModulus); - quotient = (v * kCoefficient) >> (10 * kLogModulus); - remainder = v - quotient * kModulus; - } else { - quotient = v / kModulus; - remainder = v % kModulus; - } - end = DoFastIntToBufferBackward(remainder, end, - std::integral_constant()); - return DoFastIntToBufferBackward( - quotient, end, std::integral_constant()); - } -}; - -// Returns an iterator to the start of the suffix that was appended -template -std::enable_if_t::value, BackwardIt> -DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) { - using PromotedT = std::decay_t; - using Converter = FastUIntToStringConverter; - (void)digits; - return Converter().FastIntToBufferBackward(v, end); -} - -template -std::enable_if_t::value, BackwardIt> -DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) { - if (absl::numbers_internal::IsNegative(v)) { - // Store the minus sign *before* we produce the number itself, not after. - // This gets us a tail call. - end[-static_cast(digits) - 1] = '-'; - } - return DoFastIntToBufferBackward( - absl::numbers_internal::UnsignedAbsoluteValue(v), end, digits); -} - -template -std::enable_if_t::value, int> -GetNumDigitsOrNegativeIfNegativeImpl(T v) { - const auto /* either bool or std::false_type */ is_negative = - absl::numbers_internal::IsNegative(v); - const int digits = static_cast(absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(v))); - return is_negative ? ~digits : digits; + uint64_t mod_result = PrepareEightDigits(mod08) + kEightZeroBytes; + little_endian::Store64(buffer, mod_result); + return buffer + sizeof(mod_result); } } // namespace void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull buf) { - little_endian::Store16( - buf, static_cast(PrepareTwoDigits(i) + kTwoZeroBytes)); + assert(i < 100); + uint32_t base = kTwoZeroBytes; + uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div; + uint32_t mod10 = i - 10u * div10; + base += div10 + (mod10 << 8); + little_endian::Store16(buf, static_cast(base)); } absl::Nonnull numbers_internal::FastIntToBuffer( - uint32_t i, absl::Nonnull buffer) { - const uint32_t digits = absl::numbers_internal::Base10Digits(i); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); - return buffer; + uint32_t n, absl::Nonnull out_str) { + out_str = EncodeFullU32(n, out_str); + *out_str = '\0'; + return out_str; } absl::Nonnull numbers_internal::FastIntToBuffer( int32_t i, absl::Nonnull buffer) { - buffer += static_cast(i < 0); - uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); + uint32_t u = static_cast(i); + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apparently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + buffer = EncodeFullU32(u, buffer); + *buffer = '\0'; return buffer; } absl::Nonnull numbers_internal::FastIntToBuffer( uint64_t i, absl::Nonnull buffer) { - uint32_t digits = absl::numbers_internal::Base10Digits(i); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); + buffer = EncodeFullU64(i, buffer); + *buffer = '\0'; return buffer; } absl::Nonnull numbers_internal::FastIntToBuffer( int64_t i, absl::Nonnull buffer) { - buffer += static_cast(i < 0); - uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); + uint64_t u = static_cast(i); + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apparently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + buffer = EncodeFullU64(u, buffer); + *buffer = '\0'; return buffer; } -absl::Nonnull numbers_internal::FastIntToBufferBackward( - uint32_t i, absl::Nonnull buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -absl::Nonnull numbers_internal::FastIntToBufferBackward( - int32_t i, absl::Nonnull buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -absl::Nonnull numbers_internal::FastIntToBufferBackward( - uint64_t i, absl::Nonnull buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -absl::Nonnull numbers_internal::FastIntToBufferBackward( - int64_t i, absl::Nonnull buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -int numbers_internal::GetNumDigitsOrNegativeIfNegative(signed char v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned char v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(short v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative( - unsigned short v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(int v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned int v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative( - unsigned long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(long long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative( - unsigned long long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} - // Given a 128-bit number expressed as a pair of uint64_t, high half first, // return that number multiplied by the given 32-bit value. If the result is // too large to fit in a 128-bit number, divide it by 2 until it fits. diff --git a/yass/third_party/abseil-cpp/absl/strings/numbers.h b/yass/third_party/abseil-cpp/absl/strings/numbers.h index ad4e66b634..739dbb28f9 100644 --- a/yass/third_party/abseil-cpp/absl/strings/numbers.h +++ b/yass/third_party/abseil-cpp/absl/strings/numbers.h @@ -32,7 +32,6 @@ #endif #include -#include #include #include #include @@ -40,12 +39,10 @@ #include #include -#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/macros.h" #include "absl/base/nullability.h" -#include "absl/base/optimization.h" #include "absl/base/port.h" #include "absl/numeric/bits.h" #include "absl/numeric/int128.h" @@ -161,96 +158,6 @@ bool safe_strtou128_base(absl::string_view text, static const int kFastToBufferSize = 32; static const int kSixDigitsToBufferSize = 16; -template -std::enable_if_t::value, bool> IsNegative(const T& v) { - return v < T(); -} - -template -std::enable_if_t::value, std::false_type> IsNegative( - const T&) { - // The integer is unsigned, so return a compile-time constant. - // This can help the optimizer avoid having to prove bool to be false later. - return std::false_type(); -} - -template -std::enable_if_t>::value, T&&> -UnsignedAbsoluteValue(T&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) { - // The value is unsigned; just return the original. - return std::forward(v); -} - -template -ABSL_ATTRIBUTE_CONST_FUNCTION - std::enable_if_t::value, std::make_unsigned_t> - UnsignedAbsoluteValue(T v) { - using U = std::make_unsigned_t; - return IsNegative(v) ? U() - static_cast(v) : static_cast(v); -} - -// Returns the number of base-10 digits in the given number. -// Note that this strictly counts digits. It does not count the sign. -// The `initial_digits` parameter is the starting point, which is normally equal -// to 1 because the number of digits in 0 is 1 (a special case). -// However, callers may e.g. wish to change it to 2 to account for the sign. -template -std::enable_if_t::value, uint32_t> Base10Digits( - T v, const uint32_t initial_digits = 1) { - uint32_t r = initial_digits; - // If code size becomes an issue, the 'if' stage can be removed for a minor - // performance loss. - for (;;) { - if (ABSL_PREDICT_TRUE(v < 10 * 10)) { - r += (v >= 10); - break; - } - if (ABSL_PREDICT_TRUE(v < 1000 * 10)) { - r += (v >= 1000) + 2; - break; - } - if (ABSL_PREDICT_TRUE(v < 100000 * 10)) { - r += (v >= 100000) + 4; - break; - } - r += 6; - v = static_cast(v / 1000000); - } - return r; -} - -template -std::enable_if_t::value, uint32_t> Base10Digits( - T v, uint32_t r = 1) { - // Branchlessly add 1 to account for a minus sign. - r += static_cast(IsNegative(v)); - return Base10Digits(UnsignedAbsoluteValue(v), r); -} - -// These functions return the number of base-10 digits, but multiplied by -1 if -// the input itself is negative. This is handy and efficient for later usage, -// since the bitwise complement of the result becomes equal to the number of -// characters required. -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - signed char v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned char v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - short v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned short v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(int v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned int v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - long v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned long v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - long long v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned long long v); // NOLINT - // Helper function for fast formatting of floating-point values. // The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six // significant digits are returned, trailing zeros are removed, and numbers @@ -259,18 +166,24 @@ ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( // Required buffer size is `kSixDigitsToBufferSize`. size_t SixDigitsToBuffer(double d, absl::Nonnull buffer); -// All of these functions take an output buffer +// WARNING: These functions may write more characters than necessary, because +// they are intended for speed. All functions take an output buffer // as an argument and return a pointer to the last byte they wrote, which is the // terminating '\0'. At most `kFastToBufferSize` bytes are written. -absl::Nonnull FastIntToBuffer(int32_t i, absl::Nonnull buffer); -absl::Nonnull FastIntToBuffer(uint32_t i, absl::Nonnull buffer); -absl::Nonnull FastIntToBuffer(int64_t i, absl::Nonnull buffer); -absl::Nonnull FastIntToBuffer(uint64_t i, absl::Nonnull buffer); +absl::Nonnull FastIntToBuffer(int32_t i, absl::Nonnull buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); +absl::Nonnull FastIntToBuffer(uint32_t n, absl::Nonnull out_str) + ABSL_INTERNAL_NEED_MIN_SIZE(out_str, kFastToBufferSize); +absl::Nonnull FastIntToBuffer(int64_t i, absl::Nonnull buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); +absl::Nonnull FastIntToBuffer(uint64_t i, absl::Nonnull buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); // For enums and integer types that are not an exact match for the types above, // use templates to call the appropriate one of the four overloads above. template -absl::Nonnull FastIntToBuffer(int_type i, absl::Nonnull buffer) { +absl::Nonnull FastIntToBuffer(int_type i, absl::Nonnull buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize) { static_assert(sizeof(i) <= 64 / 8, "FastIntToBuffer works only with 64-bit-or-less integers."); // TODO(jorg): This signed-ness check is used because it works correctly @@ -294,58 +207,6 @@ absl::Nonnull FastIntToBuffer(int_type i, absl::Nonnull buffer) { } } -// These functions do NOT add any null-terminator. -// They return a pointer to the beginning of the written string. -// The digit counts provided must *exactly* match the number of base-10 digits -// in the number, or the behavior is undefined. -// (i.e. do NOT count the minus sign, or over- or under-count the digits.) -absl::Nonnull FastIntToBufferBackward(int32_t i, - absl::Nonnull buffer_end, - uint32_t exact_digit_count); -absl::Nonnull FastIntToBufferBackward(uint32_t i, - absl::Nonnull buffer_end, - uint32_t exact_digit_count); -absl::Nonnull FastIntToBufferBackward(int64_t i, - absl::Nonnull buffer_end, - uint32_t exact_digit_count); -absl::Nonnull FastIntToBufferBackward(uint64_t i, - absl::Nonnull buffer_end, - uint32_t exact_digit_count); - -// For enums and integer types that are not an exact match for the types above, -// use templates to call the appropriate one of the four overloads above. -template -absl::Nonnull FastIntToBufferBackward(int_type i, - absl::Nonnull buffer_end, - uint32_t exact_digit_count) { - static_assert( - sizeof(i) <= 64 / 8, - "FastIntToBufferBackward works only with 64-bit-or-less integers."); - // This signed-ness check is used because it works correctly - // with enums, and it also serves to check that int_type is not a pointer. - // If one day something like std::is_signed works, switch to it. - // These conditions are constexpr bools to suppress MSVC warning C4127. - constexpr bool kIsSigned = static_cast(1) - 2 < 0; - constexpr bool kUse64Bit = sizeof(i) > 32 / 8; - if (kIsSigned) { - if (kUse64Bit) { - return FastIntToBufferBackward(static_cast(i), buffer_end, - exact_digit_count); - } else { - return FastIntToBufferBackward(static_cast(i), buffer_end, - exact_digit_count); - } - } else { - if (kUse64Bit) { - return FastIntToBufferBackward(static_cast(i), buffer_end, - exact_digit_count); - } else { - return FastIntToBufferBackward(static_cast(i), buffer_end, - exact_digit_count); - } - } -} - // Implementation of SimpleAtoi, generalized to support arbitrary base (used // with base different from 10 elsewhere in Abseil implementation). template diff --git a/yass/third_party/abseil-cpp/absl/strings/numbers_test.cc b/yass/third_party/abseil-cpp/absl/strings/numbers_test.cc index 1ceff70ff2..75c2dcf2fd 100644 --- a/yass/third_party/abseil-cpp/absl/strings/numbers_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/numbers_test.cc @@ -231,15 +231,10 @@ TEST(Numbers, TestFastPrints) { CheckInt32(INT_MIN); CheckInt32(INT_MAX); CheckInt64(LONG_MIN); - CheckInt64(uint64_t{10000000}); - CheckInt64(uint64_t{100000000}); CheckInt64(uint64_t{1000000000}); CheckInt64(uint64_t{9999999999}); CheckInt64(uint64_t{100000000000000}); CheckInt64(uint64_t{999999999999999}); - CheckInt64(uint64_t{1000000000000000}); - CheckInt64(uint64_t{10000000000000000}); - CheckInt64(uint64_t{100000000000000000}); CheckInt64(uint64_t{1000000000000000000}); CheckInt64(uint64_t{1199999999999999999}); CheckInt64(int64_t{-700000000000000000}); @@ -251,8 +246,6 @@ TEST(Numbers, TestFastPrints) { CheckUInt64(uint64_t{999999999999999}); CheckUInt64(uint64_t{1000000000000000000}); CheckUInt64(uint64_t{1199999999999999999}); - CheckUInt64(uint64_t{10000000000000000000u}); - CheckUInt64(uint64_t{10200300040000500006u}); CheckUInt64(std::numeric_limits::max()); for (int i = 0; i < 10000; i++) { diff --git a/yass/third_party/abseil-cpp/absl/strings/str_cat.cc b/yass/third_party/abseil-cpp/absl/strings/str_cat.cc index 098ab18388..c51c13732d 100644 --- a/yass/third_party/abseil-cpp/absl/strings/str_cat.cc +++ b/yass/third_party/abseil-cpp/absl/strings/str_cat.cc @@ -20,19 +20,18 @@ #include #include #include +#include #include -#include #include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" #include "absl/base/nullability.h" #include "absl/strings/internal/resize_uninitialized.h" -#include "absl/strings/numbers.h" #include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN - // ---------------------------------------------------------------------- // StrCat() // This merges the given strings or integers, with no delimiter. This @@ -43,7 +42,8 @@ ABSL_NAMESPACE_BEGIN namespace { // Append is merely a version of memcpy that returns the address of the byte // after the area just overwritten. -absl::Nonnull Append(absl::Nonnull out, const AlphaNum& x) { +inline absl::Nonnull Append(absl::Nonnull out, + const AlphaNum& x) { // memcpy is allowed to overwrite arbitrary memory, so doing this after the // call would force an extra fetch of x.size(). char* after = out + x.size(); @@ -53,12 +53,23 @@ absl::Nonnull Append(absl::Nonnull out, const AlphaNum& x) { return after; } +inline void STLStringAppendUninitializedAmortized(std::string* dest, + size_t to_append) { + strings_internal::AppendUninitializedTraits::Append(dest, + to_append); +} } // namespace std::string StrCat(const AlphaNum& a, const AlphaNum& b) { std::string result; - absl::strings_internal::STLStringResizeUninitialized(&result, - a.size() + b.size()); + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits::max()}; + const uint64_t result_size = + static_cast(a.size()) + static_cast(b.size()); + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); + absl::strings_internal::STLStringResizeUninitialized( + &result, static_cast(result_size)); char* const begin = &result[0]; char* out = begin; out = Append(out, a); @@ -69,8 +80,15 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b) { std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { std::string result; + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits::max()}; + const uint64_t result_size = static_cast(a.size()) + + static_cast(b.size()) + + static_cast(c.size()); + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); strings_internal::STLStringResizeUninitialized( - &result, a.size() + b.size() + c.size()); + &result, static_cast(result_size)); char* const begin = &result[0]; char* out = begin; out = Append(out, a); @@ -83,8 +101,16 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d) { std::string result; + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits::max()}; + const uint64_t result_size = static_cast(a.size()) + + static_cast(b.size()) + + static_cast(c.size()) + + static_cast(d.size()); + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); strings_internal::STLStringResizeUninitialized( - &result, a.size() + b.size() + c.size() + d.size()); + &result, static_cast(result_size)); char* const begin = &result[0]; char* out = begin; out = Append(out, a); @@ -98,135 +124,18 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, namespace strings_internal { // Do not call directly - these are not part of the public API. -void STLStringAppendUninitializedAmortized(std::string* dest, - size_t to_append) { - strings_internal::AppendUninitializedTraits::Append(dest, - to_append); -} - -template -std::enable_if_t::value, std::string> IntegerToString( - Integer i) { - std::string str; - const auto /* either bool or std::false_type */ is_negative = - absl::numbers_internal::IsNegative(i); - const uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - absl::strings_internal::STLStringResizeUninitialized( - &str, digits + static_cast(is_negative)); - absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits); - return str; -} - -template <> -std::string IntegerToString(long i) { // NOLINT - if (sizeof(i) <= sizeof(int)) { - return IntegerToString(static_cast(i)); - } else { - return IntegerToString(static_cast(i)); // NOLINT - } -} - -template <> -std::string IntegerToString(unsigned long i) { // NOLINT - if (sizeof(i) <= sizeof(unsigned int)) { - return IntegerToString(static_cast(i)); - } else { - return IntegerToString(static_cast(i)); // NOLINT - } -} - -template -std::enable_if_t::value, std::string> -FloatToString(Float f) { - std::string result; - strings_internal::STLStringResizeUninitialized( - &result, numbers_internal::kSixDigitsToBufferSize); - char* start = &result[0]; - result.erase(numbers_internal::SixDigitsToBuffer(f, start)); - return result; -} - -std::string SingleArgStrCat(int x) { return IntegerToString(x); } -std::string SingleArgStrCat(unsigned int x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(unsigned long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(long long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(unsigned long long x) { return IntegerToString(x); } -std::string SingleArgStrCat(float x) { return FloatToString(x); } -std::string SingleArgStrCat(double x) { return FloatToString(x); } - -template -std::enable_if_t::value, void> AppendIntegerToString( - std::string& str, Integer i) { - const auto /* either bool or std::false_type */ is_negative = - absl::numbers_internal::IsNegative(i); - const uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - absl::strings_internal::STLStringAppendUninitializedAmortized( - &str, digits + static_cast(is_negative)); - absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits); -} - -template <> -void AppendIntegerToString(std::string& str, long i) { // NOLINT - if (sizeof(i) <= sizeof(int)) { - return AppendIntegerToString(str, static_cast(i)); - } else { - return AppendIntegerToString(str, static_cast(i)); // NOLINT - } -} - -template <> -void AppendIntegerToString(std::string& str, - unsigned long i) { // NOLINT - if (sizeof(i) <= sizeof(unsigned int)) { - return AppendIntegerToString(str, static_cast(i)); - } else { - return AppendIntegerToString(str, - static_cast(i)); // NOLINT - } -} - -// `SingleArgStrAppend` overloads are defined here for the same reasons as with -// `SingleArgStrCat` above. -void SingleArgStrAppend(std::string& str, int x) { - return AppendIntegerToString(str, x); -} - -void SingleArgStrAppend(std::string& str, unsigned int x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, long x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, unsigned long x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, long long x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, unsigned long long x) { - return AppendIntegerToString(str, x); -} - std::string CatPieces(std::initializer_list pieces) { std::string result; - size_t total_size = 0; - for (absl::string_view piece : pieces) total_size += piece.size(); - strings_internal::STLStringResizeUninitialized(&result, total_size); + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits::max()}; + uint64_t total_size = 0; + for (absl::string_view piece : pieces) { + total_size += piece.size(); + } + ABSL_INTERNAL_CHECK(total_size <= kMaxSize, "size_t overflow"); + strings_internal::STLStringResizeUninitialized( + &result, static_cast(total_size)); char* const begin = &result[0]; char* out = begin; @@ -258,7 +167,7 @@ void AppendPieces(absl::Nonnull dest, ASSERT_NO_OVERLAP(*dest, piece); to_append += piece.size(); } - strings_internal::STLStringAppendUninitializedAmortized(dest, to_append); + STLStringAppendUninitializedAmortized(dest, to_append); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -277,7 +186,7 @@ void AppendPieces(absl::Nonnull dest, void StrAppend(absl::Nonnull dest, const AlphaNum& a) { ASSERT_NO_OVERLAP(*dest, a); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized(dest, a.size()); + STLStringAppendUninitializedAmortized(dest, a.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); @@ -289,8 +198,7 @@ void StrAppend(absl::Nonnull dest, const AlphaNum& a, ASSERT_NO_OVERLAP(*dest, a); ASSERT_NO_OVERLAP(*dest, b); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized(dest, - a.size() + b.size()); + STLStringAppendUninitializedAmortized(dest, a.size() + b.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); @@ -304,8 +212,7 @@ void StrAppend(absl::Nonnull dest, const AlphaNum& a, ASSERT_NO_OVERLAP(*dest, b); ASSERT_NO_OVERLAP(*dest, c); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized( - dest, a.size() + b.size() + c.size()); + STLStringAppendUninitializedAmortized(dest, a.size() + b.size() + c.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); @@ -321,7 +228,7 @@ void StrAppend(absl::Nonnull dest, const AlphaNum& a, ASSERT_NO_OVERLAP(*dest, c); ASSERT_NO_OVERLAP(*dest, d); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized( + STLStringAppendUninitializedAmortized( dest, a.size() + b.size() + c.size() + d.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; diff --git a/yass/third_party/abseil-cpp/absl/strings/str_cat.h b/yass/third_party/abseil-cpp/absl/strings/str_cat.h index 33355bfdfd..b98adc0248 100644 --- a/yass/third_party/abseil-cpp/absl/strings/str_cat.h +++ b/yass/third_party/abseil-cpp/absl/strings/str_cat.h @@ -94,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -453,36 +454,77 @@ std::string CatPieces(std::initializer_list pieces); void AppendPieces(absl::Nonnull dest, std::initializer_list pieces); -void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append); +template +std::string IntegerToString(Integer i) { + // Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer + // with 22 bytes (including NULL at the end). + constexpr size_t kMaxDigits10 = 22; + std::string result; + strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10); + char* start = &result[0]; + // note: this can be optimized to not write last zero. + char* end = numbers_internal::FastIntToBuffer(i, start); + auto size = static_cast(end - start); + assert((size < result.size()) && + "StrCat(Integer) does not fit into kMaxDigits10"); + result.erase(size); + return result; +} +template +std::string FloatToString(Float f) { + std::string result; + strings_internal::STLStringResizeUninitialized( + &result, numbers_internal::kSixDigitsToBufferSize); + char* start = &result[0]; + result.erase(numbers_internal::SixDigitsToBuffer(f, start)); + return result; +} // `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types // (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t // and int64_t, then at least one of the three (`int` / `long` / `long long`) // would have been ambiguous when passed to `SingleArgStrCat`. -std::string SingleArgStrCat(int x); -std::string SingleArgStrCat(unsigned int x); -std::string SingleArgStrCat(long x); // NOLINT -std::string SingleArgStrCat(unsigned long x); // NOLINT -std::string SingleArgStrCat(long long x); // NOLINT -std::string SingleArgStrCat(unsigned long long x); // NOLINT -std::string SingleArgStrCat(float x); -std::string SingleArgStrCat(double x); +inline std::string SingleArgStrCat(int x) { return IntegerToString(x); } +inline std::string SingleArgStrCat(unsigned int x) { + return IntegerToString(x); +} +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(long x) { return IntegerToString(x); } +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(unsigned long x) { + return IntegerToString(x); +} +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); } +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(unsigned long long x) { + return IntegerToString(x); +} +inline std::string SingleArgStrCat(float x) { return FloatToString(x); } +inline std::string SingleArgStrCat(double x) { return FloatToString(x); } -// `SingleArgStrAppend` overloads are defined here for the same reasons as with -// `SingleArgStrCat` above. -void SingleArgStrAppend(std::string& str, int x); -void SingleArgStrAppend(std::string& str, unsigned int x); -void SingleArgStrAppend(std::string& str, long x); // NOLINT -void SingleArgStrAppend(std::string& str, unsigned long x); // NOLINT -void SingleArgStrAppend(std::string& str, long long x); // NOLINT -void SingleArgStrAppend(std::string& str, unsigned long long x); // NOLINT +// As of September 2023, the SingleArgStrCat() optimization is only enabled for +// libc++. The reasons for this are: +// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size +// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes, +// this causes both libstdc++ and MSSTL to allocate. +// 2) strings_internal::STLStringResizeUninitialized() only has an +// implementation that avoids initialization when using libc++. This isn't as +// relevant as (1), and the cost should be benchmarked if (1) ever changes on +// libstc++ or MSSTL. +#ifdef _LIBCPP_VERSION +#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true +#else +#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false +#endif -template ::value && - !std::is_same::value && - !std::is_same::value>> +template {} && !std::is_same{}>> using EnableIfFastCase = T; +#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE + } // namespace strings_internal ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); } @@ -558,67 +600,6 @@ inline void StrAppend(absl::Nonnull dest, const AlphaNum& a, static_cast(args).Piece()...}); } -template -std::enable_if_t< - std::is_integral>::value, void> -StrAppend(absl::Nonnull result, T i) { - return absl::strings_internal::SingleArgStrAppend(*result, i); -} - -// This overload is only selected if all the parameters are numbers that can be -// handled quickly. -// Later we can look into how we can extend this to more general argument -// mixtures without bloating codegen too much, or copying unnecessarily. -template -std::enable_if_t< - (sizeof...(T) > 1), - std::common_type_t>...>> -StrAppend(absl::Nonnull str, T... args) { - // Do not add unnecessary variables, logic, or even "free" lambdas here. - // They can add overhead for the compiler and/or at run time. - // Furthermore, assume this function will be inlined. - // This function is carefully tailored to be able to be largely optimized away - // so that it becomes near-equivalent to the caller handling each argument - // individually while minimizing register pressure, so that the compiler - // can inline it with minimal overhead. - - // First, calculate the total length, so we can perform just a single resize. - // Save all the lengths for later. - size_t total_length = 0; - const ptrdiff_t lengths[] = { - absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...}; - for (const ptrdiff_t possibly_negative_length : lengths) { - // Lengths are negative for negative numbers. Keep them for later use, but - // take their absolute values for calculating total lengths; - total_length += possibly_negative_length < 0 - ? static_cast(-possibly_negative_length) - : static_cast(possibly_negative_length); - } - - // Now reserve space for all the arguments. - const size_t old_size = str->size(); - absl::strings_internal::STLStringAppendUninitializedAmortized(str, - total_length); - - // Finally, output each argument one-by-one, from left to right. - size_t i = 0; // The current argument we're processing - ptrdiff_t n; // The length of the current argument - typename String::pointer pos = &(*str)[old_size]; - using SomeTrivialEmptyType = std::false_type; - const SomeTrivialEmptyType dummy; - // Ugly code due to the lack of C++17 fold expressions - const SomeTrivialEmptyType dummies[] = { - (/* Comma expressions are poor man's C++17 fold expression for C++14 */ - (void)(n = lengths[i]), - (void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0), - (void)absl::numbers_internal::FastIntToBufferBackward( - absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)), - pos += n, static_cast(n)), - (void)++i, dummy)...}; - (void)dummies; // Remove & migrate to fold expressions in C++17 -} - // Helper function for the future StrCat default floating-point format, %.6g // This is fast. inline strings_internal::AlphaNumBuffer< diff --git a/yass/third_party/abseil-cpp/absl/strings/str_cat_test.cc b/yass/third_party/abseil-cpp/absl/strings/str_cat_test.cc index b30a86fe12..66eddf0df0 100644 --- a/yass/third_party/abseil-cpp/absl/strings/str_cat_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/str_cat_test.cc @@ -39,24 +39,6 @@ namespace { -template -void VerifyInteger(Integer value) { - const std::string expected = std::to_string(value); - - EXPECT_EQ(absl::StrCat(value), expected); - - const char* short_prefix = "x"; - const char* long_prefix = "2;k.msabxiuow2[09i;o3k21-93-9=29]"; - - std::string short_str = short_prefix; - absl::StrAppend(&short_str, value); - EXPECT_EQ(short_str, short_prefix + expected); - - std::string long_str = long_prefix; - absl::StrAppend(&long_str, value); - EXPECT_EQ(long_str, long_prefix + expected); -} - // Test absl::StrCat of ints and longs of various sizes and signdedness. TEST(StrCat, Ints) { const short s = -1; // NOLINT(runtime/int) @@ -86,34 +68,6 @@ TEST(StrCat, Ints) { EXPECT_EQ(answer, "-9-12"); answer = absl::StrCat(uintptr, 0); EXPECT_EQ(answer, "130"); - - for (const uint32_t base : {2u, 10u}) { - for (const int extra_shift : {0, 12}) { - for (uint64_t i = 0; i < (1 << 8); ++i) { - uint64_t j = i; - while (true) { - uint64_t v = j ^ (extra_shift != 0 ? (j << extra_shift) * base : 0); - VerifyInteger(static_cast(v)); - VerifyInteger(static_cast(v)); - VerifyInteger(static_cast(v)); - VerifyInteger(static_cast(v)); - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - VerifyInteger(static_cast(v)); // NOLINT - const uint64_t next = j == 0 ? 1 : j * base; - if (next <= j) { - break; - } - j = next; - } - } - } - } } TEST(StrCat, Enums) { diff --git a/yass/third_party/abseil-cpp/absl/strings/str_split.h b/yass/third_party/abseil-cpp/absl/strings/str_split.h index 49bff7b215..ba176fca3c 100644 --- a/yass/third_party/abseil-cpp/absl/strings/str_split.h +++ b/yass/third_party/abseil-cpp/absl/strings/str_split.h @@ -544,7 +544,7 @@ StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, typename strings_internal::SelectDelimiter::type; return strings_internal::Splitter( - text.value(), DelimiterType(d), std::move(p)); + text.value(), DelimiterType(std::move(d)), std::move(p)); } template (-1); diff --git a/yass/third_party/abseil-cpp/absl/strings/string_view_test.cc b/yass/third_party/abseil-cpp/absl/strings/string_view_test.cc index 251f184285..e978fc3f68 100644 --- a/yass/third_party/abseil-cpp/absl/strings/string_view_test.cc +++ b/yass/third_party/abseil-cpp/absl/strings/string_view_test.cc @@ -32,6 +32,7 @@ #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/meta/type_traits.h" #if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) // We don't control the death messaging when using std::string_view. @@ -46,6 +47,14 @@ namespace { +static_assert(!absl::type_traits_internal::IsOwner::value && + absl::type_traits_internal::IsView::value, + "string_view is a view, not an owner"); + +static_assert(absl::type_traits_internal::IsLifetimeBoundAssignment< + absl::string_view, std::string>::value, + "lifetimebound assignment not detected"); + // A minimal allocator that uses malloc(). template struct Mallocator { diff --git a/yass/third_party/abseil-cpp/absl/strings/substitute.cc b/yass/third_party/abseil-cpp/absl/strings/substitute.cc index dd32c75f69..a71f565a12 100644 --- a/yass/third_party/abseil-cpp/absl/strings/substitute.cc +++ b/yass/third_party/abseil-cpp/absl/strings/substitute.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "absl/base/config.h" @@ -84,6 +85,9 @@ void SubstituteAndAppendArray( // Build the string. size_t original_size = output->size(); + ABSL_INTERNAL_CHECK( + size <= std::numeric_limits::max() - original_size, + "size_t overflow"); strings_internal::STLStringResizeUninitializedAmortized(output, original_size + size); char* target = &(*output)[original_size]; diff --git a/yass/third_party/abseil-cpp/absl/synchronization/internal/per_thread_sem_test.cc b/yass/third_party/abseil-cpp/absl/synchronization/internal/per_thread_sem_test.cc index c66bca8469..2eb806799b 100644 --- a/yass/third_party/abseil-cpp/absl/synchronization/internal/per_thread_sem_test.cc +++ b/yass/third_party/abseil-cpp/absl/synchronization/internal/per_thread_sem_test.cc @@ -162,7 +162,7 @@ TEST_F(PerThreadSemTest, Timeouts) { absl::Duration slop = absl::Milliseconds(1); #ifdef _MSC_VER // Use higher slop on MSVC due to flaky test failures. - slop = absl::Milliseconds(4); + slop = absl::Milliseconds(8); #endif EXPECT_LE(delay - slop, elapsed) << "Wait returned " << delay - elapsed diff --git a/yass/third_party/abseil-cpp/absl/synchronization/mutex.h b/yass/third_party/abseil-cpp/absl/synchronization/mutex.h index d53a22bbd9..9505225259 100644 --- a/yass/third_party/abseil-cpp/absl/synchronization/mutex.h +++ b/yass/third_party/abseil-cpp/absl/synchronization/mutex.h @@ -148,7 +148,7 @@ struct SynchWaitParams; // // See also `MutexLock`, below, for scoped `Mutex` acquisition. -class ABSL_LOCKABLE Mutex { +class ABSL_LOCKABLE ABSL_ATTRIBUTE_WARN_UNUSED Mutex { public: // Creates a `Mutex` that is not held by anyone. This constructor is // typically used for Mutexes allocated on the heap or the stack. diff --git a/yass/third_party/abseil-cpp/absl/time/civil_time.h b/yass/third_party/abseil-cpp/absl/time/civil_time.h index 3e904a113d..d198ebaec0 100644 --- a/yass/third_party/abseil-cpp/absl/time/civil_time.h +++ b/yass/third_party/abseil-cpp/absl/time/civil_time.h @@ -55,7 +55,7 @@ // Example: // // // Construct a civil-time object for a specific day -// const absl::CivilDay cd(1969, 07, 20); +// const absl::CivilDay cd(1969, 7, 20); // // // Construct a civil-time object for a specific second // const absl::CivilSecond cd(2018, 8, 1, 12, 0, 1); @@ -65,7 +65,7 @@ // Example: // // // Valid in C++14 -// constexpr absl::CivilDay cd(1969, 07, 20); +// constexpr absl::CivilDay cd(1969, 7, 20); #ifndef ABSL_TIME_CIVIL_TIME_H_ #define ABSL_TIME_CIVIL_TIME_H_ diff --git a/yass/third_party/abseil-cpp/absl/types/BUILD.bazel b/yass/third_party/abseil-cpp/absl/types/BUILD.bazel index ce8f605b35..a86e2c13b5 100644 --- a/yass/third_party/abseil-cpp/absl/types/BUILD.bazel +++ b/yass/third_party/abseil-cpp/absl/types/BUILD.bazel @@ -138,6 +138,7 @@ cc_test( "//absl/container:fixed_array", "//absl/container:inlined_vector", "//absl/hash:hash_testing", + "//absl/meta:type_traits", "//absl/strings", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", diff --git a/yass/third_party/abseil-cpp/absl/types/CMakeLists.txt b/yass/third_party/abseil-cpp/absl/types/CMakeLists.txt index 92b4ae493c..fed532fc6b 100644 --- a/yass/third_party/abseil-cpp/absl/types/CMakeLists.txt +++ b/yass/third_party/abseil-cpp/absl/types/CMakeLists.txt @@ -138,6 +138,7 @@ absl_cc_test( absl::inlined_vector absl::hash_testing absl::strings + absl::type_traits GTest::gmock_main ) diff --git a/yass/third_party/abseil-cpp/absl/types/span.h b/yass/third_party/abseil-cpp/absl/types/span.h index c183aa88dd..560e569bc6 100644 --- a/yass/third_party/abseil-cpp/absl/types/span.h +++ b/yass/third_party/abseil-cpp/absl/types/span.h @@ -185,6 +185,7 @@ class Span { using const_reverse_iterator = std::reverse_iterator; using size_type = size_t; using difference_type = ptrdiff_t; + using absl_internal_is_view = std::true_type; static const size_type npos = ~(size_type(0)); diff --git a/yass/third_party/abseil-cpp/absl/types/span_test.cc b/yass/third_party/abseil-cpp/absl/types/span_test.cc index 29e8681f50..e24144dda1 100644 --- a/yass/third_party/abseil-cpp/absl/types/span_test.cc +++ b/yass/third_party/abseil-cpp/absl/types/span_test.cc @@ -31,10 +31,15 @@ #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" #include "absl/hash/hash_testing.h" +#include "absl/meta/type_traits.h" #include "absl/strings/str_cat.h" namespace { +static_assert(!absl::type_traits_internal::IsOwner>::value && + absl::type_traits_internal::IsView>::value, + "Span is a view, not an owner"); + MATCHER_P(DataIs, data, absl::StrCat("data() ", negation ? "isn't " : "is ", testing::PrintToString(data))) { diff --git a/yass/third_party/abseil-cpp/absl/utility/utility.h b/yass/third_party/abseil-cpp/absl/utility/utility.h index fc0d1f6551..ebbb49b715 100644 --- a/yass/third_party/abseil-cpp/absl/utility/utility.h +++ b/yass/third_party/abseil-cpp/absl/utility/utility.h @@ -51,11 +51,14 @@ ABSL_NAMESPACE_BEGIN // abstractions for platforms that had not yet provided them. Those // platforms are no longer supported. New code should simply use the // the ones from std directly. +using std::exchange; +using std::forward; using std::index_sequence; using std::index_sequence_for; using std::integer_sequence; using std::make_index_sequence; using std::make_integer_sequence; +using std::move; namespace utility_internal { @@ -129,27 +132,6 @@ template void in_place_index(utility_internal::InPlaceIndexTag) {} #endif // ABSL_USES_STD_VARIANT -// Constexpr move and forward - -// move() -// -// A constexpr version of `std::move()`, designed to be a drop-in replacement -// for C++14's `std::move()`. -template -constexpr absl::remove_reference_t&& move(T&& t) noexcept { - return static_cast&&>(t); -} - -// forward() -// -// A constexpr version of `std::forward()`, designed to be a drop-in replacement -// for C++14's `std::forward()`. -template -constexpr T&& forward( - absl::remove_reference_t& t) noexcept { // NOLINT(runtime/references) - return static_cast(t); -} - namespace utility_internal { // Helper method for expanding tuple into a called method. template @@ -215,26 +197,6 @@ auto apply(Functor&& functor, Tuple&& t) typename std::remove_reference::type>::value>{}); } -// exchange -// -// Replaces the value of `obj` with `new_value` and returns the old value of -// `obj`. `absl::exchange` is designed to be a drop-in replacement for C++14's -// `std::exchange`. -// -// Example: -// -// Foo& operator=(Foo&& other) { -// ptr1_ = absl::exchange(other.ptr1_, nullptr); -// int1_ = absl::exchange(other.int1_, -1); -// return *this; -// } -template -T exchange(T& obj, U&& new_value) { - T old_value = absl::move(obj); - obj = absl::forward(new_value); - return old_value; -} - namespace utility_internal { template T make_from_tuple_impl(Tuple&& tup, absl::index_sequence) { diff --git a/yass/third_party/abseil-cpp/absl/utility/utility_test.cc b/yass/third_party/abseil-cpp/absl/utility/utility_test.cc index 1af681362e..c540b22f40 100644 --- a/yass/third_party/abseil-cpp/absl/utility/utility_test.cc +++ b/yass/third_party/abseil-cpp/absl/utility/utility_test.cc @@ -205,14 +205,6 @@ TEST(ApplyTest, FlipFlop) { EXPECT_EQ(42, absl::apply(&FlipFlop::member, std::make_tuple(obj))); } -TEST(ExchangeTest, MoveOnly) { - auto a = Factory(1); - EXPECT_EQ(1, *a); - auto b = absl::exchange(a, Factory(2)); - EXPECT_EQ(2, *a); - EXPECT_EQ(1, *b); -} - TEST(MakeFromTupleTest, String) { EXPECT_EQ( absl::make_from_tuple(std::make_tuple("hello world", 5)), diff --git a/yass/third_party/boringssl/src/.gitignore b/yass/third_party/boringssl/src/.gitignore index 3e22141d95..439b6aa6b8 100644 --- a/yass/third_party/boringssl/src/.gitignore +++ b/yass/third_party/boringssl/src/.gitignore @@ -1,30 +1,33 @@ -build/ -build32/ -build64/ -ssl/test/runner/runner +/build +/build32 +/build64 +/ssl/test/runner/runner *.pyc *.swp *.swo -doc/*.html -doc/doc.css -rust/Cargo.lock -rust/bssl-crypto/Cargo.lock -rust/target +/doc/*.html +/doc/doc.css +/rust/Cargo.lock +/rust/bssl-crypto/Cargo.lock +/rust/target -util/bot/android_ndk -util/bot/android_sdk/public -util/bot/cmake -util/bot/golang -util/bot/libFuzzer -util/bot/libcxx -util/bot/libcxxabi -util/bot/llvm-build -util/bot/nasm-win32.exe -util/bot/ninja -util/bot/perl-win32 -util/bot/sde-linux64 -util/bot/sde-linux64.tar.xz -util/bot/sde-win32 -util/bot/sde-win32.tar.xz -util/bot/win_toolchain.json +/util/bot/android_ndk +/util/bot/android_sdk/public +/util/bot/cmake +/util/bot/golang +/util/bot/libFuzzer +/util/bot/libcxx +/util/bot/libcxxabi +/util/bot/llvm-build +/util/bot/nasm-win32.exe +/util/bot/ninja +/util/bot/perl-win32 +/util/bot/sde-linux64 +/util/bot/sde-linux64.tar.xz +/util/bot/sde-win32 +/util/bot/sde-win32.tar.xz +/util/bot/win_toolchain +/util/bot/win_toolchain.json + +# Ignore target under any directory. target/ diff --git a/yass/third_party/boringssl/src/BUILDING.md b/yass/third_party/boringssl/src/BUILDING.md index fb28e899c0..d78d28391c 100644 --- a/yass/third_party/boringssl/src/BUILDING.md +++ b/yass/third_party/boringssl/src/BUILDING.md @@ -16,15 +16,6 @@ most recent stable version of each tool. * [CMake](https://cmake.org/download/) 3.12 or later is required. - * A recent version of Perl is required. On Windows, - [Active State Perl](http://www.activestate.com/activeperl/) has been - reported to work, as has MSYS Perl. - [Strawberry Perl](http://strawberryperl.com/) also works but it adds GCC - to `PATH`, which can confuse some build tools when identifying the compiler - (removing `C:\Strawberry\c\bin` from `PATH` should resolve any problems). - If Perl is not found by CMake, it may be configured explicitly by setting - `PERL_EXECUTABLE`. - * Building with [Ninja](https://ninja-build.org/) instead of Make is recommended, because it makes builds faster. On Windows, CMake's Visual Studio generator may also work, but it not tested regularly and requires @@ -40,14 +31,6 @@ most recent stable version of each tool. GCC (6.1+) and Clang should work on non-Windows platforms, and maybe on Windows too. - * The most recent stable version of [Go](https://golang.org/dl/) is required. - Note Go is exempt from the five year support window. If not found by CMake, - the go executable may be configured explicitly by setting `GO_EXECUTABLE`. - - * On x86_64 Linux, the tests have an optional - [libunwind](https://www.nongnu.org/libunwind/) dependency to test the - assembly more thoroughly. - ## Building Using Ninja (note the 'N' is capitalized in the cmake invocation): @@ -136,7 +119,8 @@ supported. BoringSSL's build system has experimental support for adding a custom prefix to all symbols. This can be useful when linking multiple versions of BoringSSL in -the same project to avoid symbol conflicts. +the same project to avoid symbol conflicts. Symbol prefixing requires the most +recent stable version of [Go](https://go.dev/). In order to build with prefixed symbols, the `BORINGSSL_PREFIX` CMake variable should specify the prefix to add to all symbols, and the @@ -204,6 +188,16 @@ and performance. For instance, BoringSSL's fastest P-256 implementation uses a # Running Tests +There are two additional dependencies for running tests: + + * The most recent stable version of [Go](https://go.dev/) is required. + Note Go is exempt from the five year support window. If not found by CMake, + the go executable may be configured explicitly by setting `GO_EXECUTABLE`. + + * On x86_64 Linux, the tests have an optional + [libunwind](https://www.nongnu.org/libunwind/) dependency to test the + assembly more thoroughly. + There are two sets of tests: the C/C++ tests and the blackbox tests. For former are built by Ninja and can be run from the top-level directory with `go run util/all_tests.go`. The latter have to be run separately by running `go test` @@ -211,3 +205,17 @@ from within `ssl/test/runner`. Both sets of tests may also be run with `ninja -C build run_tests`, but CMake 3.2 or later is required to avoid Ninja's output buffering. + +# Pre-generated Files + +If modifying perlasm files, or `util/pregenerate/build.json`, you will need to +run `go run ./util/pregenerate` to refresh some pre-generated files. To do this, +you will need a recent version of Perl. + +On Windows, [Active State Perl](http://www.activestate.com/activeperl/) has been +reported to work, as has MSYS Perl. +[Strawberry Perl](http://strawberryperl.com/) also works but it adds GCC +to `PATH`, which can confuse some build tools when identifying the compiler +(removing `C:\Strawberry\c\bin` from `PATH` should resolve any problems). + +See (gen/README.md)[./gen/README.md] for more details. diff --git a/yass/third_party/boringssl/src/CMakeLists.txt b/yass/third_party/boringssl/src/CMakeLists.txt index d55aede9a5..7958c891a5 100644 --- a/yass/third_party/boringssl/src/CMakeLists.txt +++ b/yass/third_party/boringssl/src/CMakeLists.txt @@ -20,10 +20,9 @@ if(WIN32) set(CMAKE_CXX_COMPILER_ARCHITECTURE_ID "${CMAKE_CXX_COMPILER_TARGET}" CACHE STRING "") endif() -include(sources.cmake) include(cmake/go.cmake) include(cmake/paths.cmake) -include(cmake/perlasm.cmake) +include(gen/sources.cmake) # Select the MSVC runtime library for use by compilers targeting the MSVC ABI. # Use MSVC_CRT_LINKAGE variable internally, option: dynamic and static @@ -46,25 +45,7 @@ enable_language(CXX) include(GNUInstallDirs) -# CMake versions before 3.14 do not have default destination values. Executable -# and library targets that use a default destination should include this -# variable. -if(CMAKE_VERSION VERSION_LESS "3.14") - set(INSTALL_DESTINATION_DEFAULT - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) -endif() - -# Wrap the CMake install function so we can disable it. set(INSTALL_ENABLED 0) -function(install_if_enabled) - if(INSTALL_ENABLED) - install(${ARGV}) - endif() -endfunction() - -find_package(Perl REQUIRED) if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT CMAKE_CROSSCOMPILING) find_package(PkgConfig QUIET) @@ -99,6 +80,7 @@ foreach(VAR CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_ASM_FLAGS) endforeach() if(BORINGSSL_PREFIX AND BORINGSSL_PREFIX_SYMBOLS) + require_go() add_definitions(-DBORINGSSL_PREFIX=${BORINGSSL_PREFIX}) # CMake automatically connects include_directories to the NASM command-line, # but not add_definitions. @@ -166,7 +148,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) endif() if(CLANG) - set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wnewline-eof -fcolor-diagnostics -Wframe-larger-than=25344") + set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wnewline-eof -fcolor-diagnostics") else() # GCC (at least 4.8.4) has a bug where it'll find unreachable free() calls # and declare that the code is trying to free a stack pointer. @@ -183,6 +165,18 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wstring-concatenation") endif() + # Clang 12's -Wframe-larger-than reportedly does not work in clang-cl. See + # https://crbug.com/boringssl/709. Clang 13 includes the following fix, which + # may be related. Speculatively gate on Clang 13. That corresponds to + # AppleClang 13.1.6. + # https://github.com/llvm/llvm-project/commit/6aaf4fa2885600b0e31042071ad06f78218ab0f2 + if((CMAKE_C_COMPILER_ID STREQUAL "Clang" AND + CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0.0") OR + (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND + CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "13.1.6")) + set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wframe-larger-than=25344") + endif() + if(CLANG OR CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "7.0.0") set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wimplicit-fallthrough") endif() @@ -345,6 +339,7 @@ if(GCOV) endif() if(FIPS) + require_go() add_definitions(-DBORINGSSL_FIPS) if(FIPS_BREAK_TEST) add_definitions("-DBORINGSSL_FIPS_BREAK_${FIPS_BREAK_TEST}=1") @@ -373,8 +368,6 @@ endif() if(CONSTANT_TIME_VALIDATION) add_definitions(-DBORINGSSL_CONSTANT_TIME_VALIDATION) - # Asserts will often test secret data. - add_definitions(-DNDEBUG) endif() if(MALLOC_FAILURE_TESTING) @@ -567,39 +560,171 @@ target_include_directories( # themselves as dependencies next to the target definition. add_custom_target(all_tests) -# On Windows, CRYPTO_TEST_DATA is too long to fit in command-line limits. -# TODO(davidben): CMake 3.12 has a list(JOIN) command. Use that when we've -# updated the minimum version. -set(EMBED_TEST_DATA_ARGS "") -foreach(arg ${CRYPTO_TEST_DATA}) - set(EMBED_TEST_DATA_ARGS "${EMBED_TEST_DATA_ARGS}${arg}\n") -endforeach() -file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/embed_test_data_args.txt" - "${EMBED_TEST_DATA_ARGS}") - -add_custom_command( - OUTPUT crypto_test_data.cc - COMMAND ${GO_EXECUTABLE} run util/embed_test_data.go -file-list - "${CMAKE_CURRENT_BINARY_DIR}/embed_test_data_args.txt" > - "${CMAKE_CURRENT_BINARY_DIR}/crypto_test_data.cc" - DEPENDS util/embed_test_data.go ${CRYPTO_TEST_DATA} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - -add_library(crypto_test_data OBJECT crypto_test_data.cc) - -endif() - -add_subdirectory(crypto) -add_subdirectory(ssl) add_subdirectory(ssl/test) add_subdirectory(util/fipstools) add_subdirectory(util/fipstools/acvp/modulewrapper) -add_subdirectory(decrepit) + +endif (BORINGSSL_BUILD_TESTS) + +if(OPENSSL_ASM) + set(CRYPTO_SOURCES_ASM_USED ${CRYPTO_SOURCES_ASM}) + set(BCM_SOURCES_ASM_USED ${BCM_SOURCES_ASM}) + set(TEST_SUPPORT_SOURCES_ASM_USED ${TEST_SUPPORT_SOURCES_ASM}) +elseif(OPENSSL_NASM) + set(CRYPTO_SOURCES_ASM_USED ${CRYPTO_SOURCES_NASM}) + set(BCM_SOURCES_ASM_USED ${BCM_SOURCES_NASM}) + set(TEST_SUPPORT_SOURCES_ASM_USED ${TEST_SUPPORT_SOURCES_NASM}) +endif() + +if (MSVC AND NOT OPENSSL_NO_ASM) + # The ASM_MASM compiler id for this compiler is "MSVC", so fill out the runtime library table. + set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreaded "") + set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDLL "") + set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebug "") + set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebugDLL "") +endif() + +if(FIPS_DELOCATE AND FIPS_SHARED) + message(FATAL_ERROR "Can't set both delocate and shared mode for FIPS build") +endif() + +if(FIPS_DELOCATE) + add_library(bcm_c_generated_asm STATIC ${BCM_SOURCES}) + add_dependencies(bcm_c_generated_asm boringssl_prefix_symbols) + target_include_directories(bcm_c_generated_asm PRIVATE ${PROJECT_SOURCE_DIR}/include) + set_target_properties(bcm_c_generated_asm PROPERTIES COMPILE_OPTIONS "-S") + set_target_properties(bcm_c_generated_asm PROPERTIES POSITION_INDEPENDENT_CODE ON) + + set(TARGET_FLAG "") + if(CMAKE_ASM_COMPILER_TARGET) + set(TARGET_FLAG "--target=${CMAKE_ASM_COMPILER_TARGET}") + endif() + + go_executable(delocate boringssl.googlesource.com/boringssl/util/fipstools/delocate) + add_custom_command( + OUTPUT bcm-delocated.S + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/delocate + -a $ + -o ${CMAKE_CURRENT_BINARY_DIR}/bcm-delocated.S + -cc ${CMAKE_ASM_COMPILER} + -cc-flags "${TARGET_FLAG} ${CMAKE_ASM_FLAGS}" + ${BCM_SOURCES_ASM_USED} + ${CRYPTO_HEADERS} + DEPENDS bcm_c_generated_asm + delocate + ${BCM_SOURCES_ASM_USED} + ${CRYPTO_HEADERS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + + add_library(bcm_hashunset STATIC bcm-delocated.S) + set_target_properties(bcm_hashunset PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(bcm_hashunset PROPERTIES LINKER_LANGUAGE C) + + go_executable(inject_hash + boringssl.googlesource.com/boringssl/util/fipstools/inject_hash) + add_custom_command( + OUTPUT bcm.o + COMMAND ./inject_hash -o bcm.o -in-archive $ + DEPENDS bcm_hashunset inject_hash + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + set(CRYPTO_FIPS_OBJECTS bcm.o) +elseif(FIPS_SHARED) + if(NOT BUILD_SHARED_LIBS) + message(FATAL_ERROR "FIPS_SHARED set but not BUILD_SHARED_LIBS") + endif() + + add_library(bcm_library STATIC ${BCM_SOURCES} ${BCM_SOURCES_ASM_USED}) + add_dependencies(bcm_library boringssl_prefix_symbols) + target_include_directories(bcm_library PRIVATE ${PROJECT_SOURCE_DIR}/include) + + add_custom_command( + OUTPUT bcm.o + COMMAND ${CMAKE_LINKER} -r -T ${CMAKE_CURRENT_SOURCE_DIR}/crypto/fipsmodule/fips_shared.lds -o bcm.o --whole-archive $ + DEPENDS bcm_library crypto/fipsmodule/fips_shared.lds + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + set(CRYPTO_FIPS_OBJECTS bcm.o) +else() + add_library(fipsmodule OBJECT ${BCM_SOURCES} ${BCM_SOURCES_ASM_USED}) + add_dependencies(fipsmodule boringssl_prefix_symbols) + target_include_directories(fipsmodule PRIVATE ${PROJECT_SOURCE_DIR}/include) + set(CRYPTO_FIPS_OBJECTS $) +endif() + +add_library(crypto ${CRYPTO_SOURCES} ${CRYPTO_FIPS_OBJECTS} ${CRYPTO_SOURCES_ASM_USED}) +target_include_directories(crypto PUBLIC + $ + $ +) +set_property(TARGET crypto PROPERTY EXPORT_NAME Crypto) + +if (MSVC) + set_target_properties(crypto PROPERTIES + COMPILE_PDB_NAME crypto + COMPILE_PDB_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR} + ) +endif() + +if(FIPS_SHARED) + # Rewrite libcrypto.so to inject the correct module hash value. This assumes + # UNIX-style library naming, but we only support FIPS mode on Linux anyway. + add_custom_command( + TARGET crypto POST_BUILD + COMMAND ${GO_EXECUTABLE} run + ${CMAKE_CURRENT_SOURCE_DIR}/util/fipstools/inject_hash/inject_hash.go + -o libcrypto.so -in-object libcrypto.so + # The DEPENDS argument to a POST_BUILD rule appears to be ignored. Thus + # go_executable isn't used (as it doesn't get built), but we list this + # dependency anyway in case it starts working in some CMake version. + DEPENDS util/fipstools/inject_hash/inject_hash.go + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) +endif() + +add_dependencies(crypto boringssl_prefix_symbols) +if(WIN32) + target_link_libraries(crypto ws2_32) +endif() + +# CMAKE_SYSTEM_NAME is "Generic" for embedded OSes: +# https://cmake.org/cmake/help/book/mastering-cmake/chapter/Cross%20Compiling%20With%20CMake.html#toolchain-files +# +# For now we assume embedded OSes do not have threads. Additionally, the Threads +# package does not work with Android, but Android does not require any extra +# parameters to link pthreads. +if(NOT WIN32 AND NOT CMAKE_SYSTEM_NAME MATCHES "^(Generic|Android)$") + find_package(Threads REQUIRED) + target_link_libraries(crypto Threads::Threads) +endif() + +# Every target depends on crypto, so we add libcxx as a dependency here to +# simplify injecting it everywhere. +if(USE_CUSTOM_LIBCXX) + target_link_libraries(crypto libcxx) +endif() + +add_library(ssl ${SSL_SOURCES}) +# Although libssl also provides headers that require an include directory, the +# flag is already specified by libcrypto, so we omit target_include_directories +# here. +set_property(TARGET ssl PROPERTY EXPORT_NAME SSL) +if (MSVC) + set_target_properties(ssl PROPERTIES + COMPILE_PDB_NAME ssl + COMPILE_PDB_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR} + ) +endif() +target_link_libraries(ssl crypto) + +add_library(decrepit ${DECREPIT_SOURCES}) +target_link_libraries(decrepit crypto ssl) if (BORINGSSL_BUILD_TESTS) - -add_library(test_support_lib STATIC ${TEST_SUPPORT_SOURCES}) -if (LIBUNWIND_FOUND) +add_library(test_support_lib STATIC + ${TEST_SUPPORT_SOURCES} ${TEST_SUPPORT_SOURCES_ASM_USED}) +if(LIBUNWIND_FOUND) target_compile_options(test_support_lib PRIVATE ${LIBUNWIND_CFLAGS_OTHER}) target_include_directories(test_support_lib PRIVATE ${LIBUNWIND_INCLUDE_DIRS}) target_link_libraries(test_support_lib ${LIBUNWIND_LDFLAGS}) @@ -616,14 +741,13 @@ add_executable(urandom_test ${URANDOM_TEST_SOURCES}) target_link_libraries(urandom_test test_support_lib boringssl_gtest crypto) add_dependencies(all_tests urandom_test) -add_executable(crypto_test ${CRYPTO_TEST_SOURCES} $) +add_executable(crypto_test ${CRYPTO_TEST_SOURCES}) target_link_libraries(crypto_test test_support_lib boringssl_gtest crypto) add_dependencies(all_tests crypto_test) add_executable(ssl_test ${SSL_TEST_SOURCES}) target_link_libraries(ssl_test test_support_lib boringssl_gtest ssl crypto) add_dependencies(all_tests ssl_test) - add_executable(decrepit_test ${DECREPIT_TEST_SOURCES}) target_link_libraries(decrepit_test test_support_lib boringssl_gtest decrepit crypto) @@ -663,19 +787,15 @@ set_target_properties( endif (BORINGSSL_BUILD_TESTS) add_executable(bssl ${BSSL_SOURCES}) -if (BORINGSSL_BUILD_TESTS) -install_if_enabled(TARGETS bssl DESTINATION ${INSTALL_DESTINATION_DEFAULT}) -endif (BORINGSSL_BUILD_TESTS) - target_link_libraries(bssl ssl crypto) if (BORINGSSL_BUILD_TESTS) # Historically, targets were built in subdirectories. For compatibility with # existing tools, we, for now, copy the targets into the subdirectories. This # will be removed sometime in 2024. -copy_post_build(crypto crypto_test urandom_test) -copy_post_build(ssl ssl_test) -copy_post_build(decrepit decrepit_test) +copy_post_build(crypto crypto crypto_test urandom_test) +copy_post_build(ssl ssl ssl_test) +copy_post_build(decrepit decrepit decrepit_test) copy_post_build(tool bssl) endif (BORINGSSL_BUILD_TESTS) @@ -735,25 +855,50 @@ file(STRINGS util/go_tests.txt GO_TESTS) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS util/go_tests.txt) -add_custom_target( - run_tests - COMMAND ${GO_EXECUTABLE} test ${GO_TESTS} - COMMAND ${GO_EXECUTABLE} run util/all_tests.go -build-dir - ${CMAKE_CURRENT_BINARY_DIR} - COMMAND cd ssl/test/runner && - ${GO_EXECUTABLE} test -shim-path $ - ${HANDSHAKER_ARGS} ${RUNNER_ARGS} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS all_tests bssl_shim handshaker fips_specific_tests_if_any - USES_TERMINAL) - -install_if_enabled(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - -install_if_enabled(EXPORT OpenSSLTargets - FILE OpenSSLTargets.cmake - NAMESPACE OpenSSL:: - DESTINATION lib/cmake/OpenSSL -) -install_if_enabled(FILES cmake/OpenSSLConfig.cmake DESTINATION lib/cmake/OpenSSL) +if(GO_EXECUTABLE) + add_custom_target( + run_tests + COMMAND ${CMAKE_COMMAND} -E echo "Running Go tests" + COMMAND ${GO_EXECUTABLE} test ${GO_TESTS} + COMMAND ${CMAKE_COMMAND} -E echo + COMMAND ${CMAKE_COMMAND} -E echo "Running unit tests" + COMMAND ${GO_EXECUTABLE} run util/all_tests.go -build-dir + ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E echo + COMMAND ${CMAKE_COMMAND} -E echo "Running SSL tests" + COMMAND cd ssl/test/runner && + ${GO_EXECUTABLE} test -shim-path $ + ${HANDSHAKER_ARGS} ${RUNNER_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS all_tests bssl_shim handshaker fips_specific_tests_if_any + USES_TERMINAL) +else() + add_custom_target( + run_tests + COMMAND ${CMAKE_COMMAND} -E echo "Running tests requires Go" + COMMAND ${CMAKE_COMMAND} -E false) +endif() endif(BORINGSSL_BUILD_TESTS) + +if(INSTALL_ENABLED) + # CMake versions before 3.14 do not have default destination values. Executable + # and library targets that use a default destination should include this + # variable. + if(CMAKE_VERSION VERSION_LESS "3.14") + set(INSTALL_DESTINATION_DEFAULT + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + endif() + + install(TARGETS crypto ssl + EXPORT OpenSSLTargets ${INSTALL_DESTINATION_DEFAULT}) + install(TARGETS bssl DESTINATION ${INSTALL_DESTINATION_DEFAULT}) + install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + install(EXPORT OpenSSLTargets + FILE OpenSSLTargets.cmake + NAMESPACE OpenSSL:: + DESTINATION lib/cmake/OpenSSL) + install(FILES cmake/OpenSSLConfig.cmake DESTINATION lib/cmake/OpenSSL) +endif() diff --git a/yass/third_party/boringssl/src/PrivacyInfo.xcprivacy b/yass/third_party/boringssl/src/PrivacyInfo.xcprivacy new file mode 100644 index 0000000000..dc3a906b2f --- /dev/null +++ b/yass/third_party/boringssl/src/PrivacyInfo.xcprivacy @@ -0,0 +1,21 @@ + + + + + + + + NSPrivacyTracking + + NSPrivacyTrackingDomains + + NSPrivacyCollectedDataTypes + + NSPrivacyAccessedAPITypes + + + diff --git a/yass/third_party/boringssl/src/build.json b/yass/third_party/boringssl/src/build.json new file mode 100644 index 0000000000..a431f6a42e --- /dev/null +++ b/yass/third_party/boringssl/src/build.json @@ -0,0 +1,987 @@ +// This file defines BoringSSL's build, expressed in terms of the input source +// files that BoringSSL developers edit. It is a JSON file with line comments, +// with line comments removed before parsing. It drives ./util/pregenerate which +// converts some of those inputs (e.g. perlasm files) into pre-generated +// outputs. This produces a more simplified build, which is then converted into +// build files of various syntaxes. +// +// When modifying this file, run `go run ./util/pregenerate`. See gen/README.md +// for more details, and util/pregenerate/build.go for the schema. +// +// TODO(crbug.com/boringssl/542): Moving build inputs to this file is still work +// in progress, so this file is currently incomplete. +{ + "bcm": { + "srcs": [ + "crypto/fipsmodule/bcm.c" + ], + "internal_hdrs": [ + "crypto/fipsmodule/aes/aes.c", + "crypto/fipsmodule/aes/aes_nohw.c", + "crypto/fipsmodule/aes/key_wrap.c", + "crypto/fipsmodule/aes/mode_wrappers.c", + "crypto/fipsmodule/bn/add.c", + "crypto/fipsmodule/bn/asm/x86_64-gcc.c", + "crypto/fipsmodule/bn/bn.c", + "crypto/fipsmodule/bn/bytes.c", + "crypto/fipsmodule/bn/cmp.c", + "crypto/fipsmodule/bn/ctx.c", + "crypto/fipsmodule/bn/div.c", + "crypto/fipsmodule/bn/div_extra.c", + "crypto/fipsmodule/bn/exponentiation.c", + "crypto/fipsmodule/bn/gcd.c", + "crypto/fipsmodule/bn/gcd_extra.c", + "crypto/fipsmodule/bn/generic.c", + "crypto/fipsmodule/bn/jacobi.c", + "crypto/fipsmodule/bn/montgomery.c", + "crypto/fipsmodule/bn/montgomery_inv.c", + "crypto/fipsmodule/bn/mul.c", + "crypto/fipsmodule/bn/prime.c", + "crypto/fipsmodule/bn/random.c", + "crypto/fipsmodule/bn/rsaz_exp.c", + "crypto/fipsmodule/bn/shift.c", + "crypto/fipsmodule/bn/sqrt.c", + "crypto/fipsmodule/cipher/aead.c", + "crypto/fipsmodule/cipher/cipher.c", + "crypto/fipsmodule/cipher/e_aes.c", + "crypto/fipsmodule/cipher/e_aesccm.c", + "crypto/fipsmodule/cmac/cmac.c", + "crypto/fipsmodule/dh/check.c", + "crypto/fipsmodule/dh/dh.c", + "crypto/fipsmodule/digest/digest.c", + "crypto/fipsmodule/digest/digests.c", + "crypto/fipsmodule/digestsign/digestsign.c", + "crypto/fipsmodule/ec/ec.c", + "crypto/fipsmodule/ec/ec_key.c", + "crypto/fipsmodule/ec/ec_montgomery.c", + "crypto/fipsmodule/ec/felem.c", + "crypto/fipsmodule/ec/oct.c", + "crypto/fipsmodule/ec/p224-64.c", + "crypto/fipsmodule/ec/p256-nistz.c", + "crypto/fipsmodule/ec/p256.c", + "crypto/fipsmodule/ec/scalar.c", + "crypto/fipsmodule/ec/simple.c", + "crypto/fipsmodule/ec/simple_mul.c", + "crypto/fipsmodule/ec/util.c", + "crypto/fipsmodule/ec/wnaf.c", + "crypto/fipsmodule/ecdh/ecdh.c", + "crypto/fipsmodule/ecdsa/ecdsa.c", + "crypto/fipsmodule/hkdf/hkdf.c", + "crypto/fipsmodule/hmac/hmac.c", + "crypto/fipsmodule/md4/md4.c", + "crypto/fipsmodule/md5/md5.c", + "crypto/fipsmodule/modes/cbc.c", + "crypto/fipsmodule/modes/cfb.c", + "crypto/fipsmodule/modes/ctr.c", + "crypto/fipsmodule/modes/gcm.c", + "crypto/fipsmodule/modes/gcm_nohw.c", + "crypto/fipsmodule/modes/ofb.c", + "crypto/fipsmodule/modes/polyval.c", + "crypto/fipsmodule/rand/ctrdrbg.c", + "crypto/fipsmodule/rand/fork_detect.c", + "crypto/fipsmodule/rand/rand.c", + "crypto/fipsmodule/rand/urandom.c", + "crypto/fipsmodule/rsa/blinding.c", + "crypto/fipsmodule/rsa/padding.c", + "crypto/fipsmodule/rsa/rsa.c", + "crypto/fipsmodule/rsa/rsa_impl.c", + "crypto/fipsmodule/self_check/fips.c", + "crypto/fipsmodule/self_check/self_check.c", + "crypto/fipsmodule/service_indicator/service_indicator.c", + "crypto/fipsmodule/sha/sha1.c", + "crypto/fipsmodule/sha/sha256.c", + "crypto/fipsmodule/sha/sha512.c", + "crypto/fipsmodule/tls/kdf.c" + ], + "perlasm_aarch64": [ + {"src": "crypto/fipsmodule/aes/asm/aesv8-armx.pl", "dst": "aesv8-armv8"}, + {"src": "crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl"}, + {"src": "crypto/fipsmodule/bn/asm/armv8-mont.pl"}, + {"src": "crypto/fipsmodule/bn/asm/bn-armv8.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghashv8-armx.pl", "dst": "ghashv8-armv8"}, + {"src": "crypto/fipsmodule/ec/asm/p256_beeu-armv8-asm.pl"}, + {"src": "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha1-armv8.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha512-armv8.pl", "args": ["sha256"], "dst": "sha256-armv8"}, + {"src": "crypto/fipsmodule/sha/asm/sha512-armv8.pl", "args": ["sha512"]}, + {"src": "crypto/fipsmodule/aes/asm/vpaes-armv8.pl"} + ], + "perlasm_arm": [ + {"src": "crypto/fipsmodule/aes/asm/aesv8-armx.pl", "dst": "aesv8-armv7"}, + {"src": "crypto/fipsmodule/bn/asm/armv4-mont.pl"}, + {"src": "crypto/fipsmodule/aes/asm/bsaes-armv7.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghash-armv4.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghashv8-armx.pl", "dst": "ghashv8-armv7"}, + {"src": "crypto/fipsmodule/sha/asm/sha1-armv4-large.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha256-armv4.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha512-armv4.pl"}, + {"src": "crypto/fipsmodule/aes/asm/vpaes-armv7.pl"} + ], + "perlasm_x86": [ + {"src": "crypto/fipsmodule/aes/asm/aesni-x86.pl"}, + {"src": "crypto/fipsmodule/bn/asm/bn-586.pl"}, + {"src": "crypto/fipsmodule/bn/asm/co-586.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghash-x86.pl"}, + {"src": "crypto/fipsmodule/md5/asm/md5-586.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha1-586.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha256-586.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha512-586.pl"}, + {"src": "crypto/fipsmodule/aes/asm/vpaes-x86.pl"}, + {"src": "crypto/fipsmodule/bn/asm/x86-mont.pl"} + ], + "perlasm_x86_64": [ + {"src": "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"}, + {"src": "crypto/fipsmodule/aes/asm/aesni-x86_64.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl"}, + {"src": "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"}, + {"src": "crypto/fipsmodule/md5/asm/md5-x86_64.pl"}, + {"src": "crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl"}, + {"src": "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"}, + {"src": "crypto/fipsmodule/rand/asm/rdrand-x86_64.pl"}, + {"src": "crypto/fipsmodule/bn/asm/rsaz-avx2.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha1-x86_64.pl"}, + {"src": "crypto/fipsmodule/sha/asm/sha512-x86_64.pl", "args": ["sha256"], "dst": "sha256-x86_64"}, + {"src": "crypto/fipsmodule/sha/asm/sha512-x86_64.pl", "args": ["sha512"]}, + {"src": "crypto/fipsmodule/aes/asm/vpaes-x86_64.pl"}, + {"src": "crypto/fipsmodule/bn/asm/x86_64-mont.pl"}, + {"src": "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"} + ] + }, + "crypto": { + "srcs": [ + "crypto/asn1/a_bitstr.c", + "crypto/asn1/a_bool.c", + "crypto/asn1/a_d2i_fp.c", + "crypto/asn1/a_dup.c", + "crypto/asn1/a_gentm.c", + "crypto/asn1/a_i2d_fp.c", + "crypto/asn1/a_int.c", + "crypto/asn1/a_mbstr.c", + "crypto/asn1/a_object.c", + "crypto/asn1/a_octet.c", + "crypto/asn1/a_strex.c", + "crypto/asn1/a_strnid.c", + "crypto/asn1/a_time.c", + "crypto/asn1/a_type.c", + "crypto/asn1/a_utctm.c", + "crypto/asn1/asn1_lib.c", + "crypto/asn1/asn1_par.c", + "crypto/asn1/asn_pack.c", + "crypto/asn1/f_int.c", + "crypto/asn1/f_string.c", + "crypto/asn1/tasn_dec.c", + "crypto/asn1/tasn_enc.c", + "crypto/asn1/tasn_fre.c", + "crypto/asn1/tasn_new.c", + "crypto/asn1/tasn_typ.c", + "crypto/asn1/tasn_utl.c", + "crypto/asn1/posix_time.c", + "crypto/base64/base64.c", + "crypto/bio/bio.c", + "crypto/bio/bio_mem.c", + "crypto/bio/connect.c", + "crypto/bio/errno.c", + "crypto/bio/fd.c", + "crypto/bio/file.c", + "crypto/bio/hexdump.c", + "crypto/bio/pair.c", + "crypto/bio/printf.c", + "crypto/bio/socket.c", + "crypto/bio/socket_helper.c", + "crypto/blake2/blake2.c", + "crypto/bn_extra/bn_asn1.c", + "crypto/bn_extra/convert.c", + "crypto/buf/buf.c", + "crypto/bytestring/asn1_compat.c", + "crypto/bytestring/ber.c", + "crypto/bytestring/cbb.c", + "crypto/bytestring/cbs.c", + "crypto/bytestring/unicode.c", + "crypto/chacha/chacha.c", + "crypto/cipher_extra/cipher_extra.c", + "crypto/cipher_extra/derive_key.c", + "crypto/cipher_extra/e_aesctrhmac.c", + "crypto/cipher_extra/e_aesgcmsiv.c", + "crypto/cipher_extra/e_chacha20poly1305.c", + "crypto/cipher_extra/e_des.c", + "crypto/cipher_extra/e_null.c", + "crypto/cipher_extra/e_rc2.c", + "crypto/cipher_extra/e_rc4.c", + "crypto/cipher_extra/e_tls.c", + "crypto/cipher_extra/tls_cbc.c", + "crypto/conf/conf.c", + "crypto/cpu_aarch64_apple.c", + "crypto/cpu_aarch64_openbsd.c", + "crypto/cpu_aarch64_fuchsia.c", + "crypto/cpu_aarch64_linux.c", + "crypto/cpu_aarch64_sysreg.c", + "crypto/cpu_aarch64_win.c", + "crypto/cpu_arm_freebsd.c", + "crypto/cpu_arm_linux.c", + "crypto/cpu_intel.c", + "crypto/crypto.c", + "crypto/curve25519/curve25519.c", + "crypto/curve25519/curve25519_64_adx.c", + "crypto/curve25519/spake25519.c", + "crypto/des/des.c", + "crypto/dh_extra/params.c", + "crypto/dh_extra/dh_asn1.c", + "crypto/digest_extra/digest_extra.c", + "crypto/dsa/dsa.c", + "crypto/dsa/dsa_asn1.c", + "crypto/ecdh_extra/ecdh_extra.c", + "crypto/ecdsa_extra/ecdsa_asn1.c", + "crypto/ec_extra/ec_asn1.c", + "crypto/ec_extra/ec_derive.c", + "crypto/ec_extra/hash_to_curve.c", + "crypto/err/err.c", + "crypto/engine/engine.c", + "crypto/evp/evp.c", + "crypto/evp/evp_asn1.c", + "crypto/evp/evp_ctx.c", + "crypto/evp/p_dh.c", + "crypto/evp/p_dh_asn1.c", + "crypto/evp/p_dsa_asn1.c", + "crypto/evp/p_ec.c", + "crypto/evp/p_ec_asn1.c", + "crypto/evp/p_ed25519.c", + "crypto/evp/p_ed25519_asn1.c", + "crypto/evp/p_hkdf.c", + "crypto/evp/p_rsa.c", + "crypto/evp/p_rsa_asn1.c", + "crypto/evp/p_x25519.c", + "crypto/evp/p_x25519_asn1.c", + "crypto/evp/pbkdf.c", + "crypto/evp/print.c", + "crypto/evp/scrypt.c", + "crypto/evp/sign.c", + "crypto/ex_data.c", + "crypto/fipsmodule/fips_shared_support.c", + "crypto/hpke/hpke.c", + "crypto/hrss/hrss.c", + "crypto/keccak/keccak.c", + "crypto/kyber/kyber.c", + "crypto/lhash/lhash.c", + "crypto/mem.c", + "crypto/obj/obj.c", + "crypto/obj/obj_xref.c", + "crypto/pem/pem_all.c", + "crypto/pem/pem_info.c", + "crypto/pem/pem_lib.c", + "crypto/pem/pem_oth.c", + "crypto/pem/pem_pk8.c", + "crypto/pem/pem_pkey.c", + "crypto/pem/pem_x509.c", + "crypto/pem/pem_xaux.c", + "crypto/pkcs7/pkcs7.c", + "crypto/pkcs7/pkcs7_x509.c", + "crypto/pkcs8/pkcs8.c", + "crypto/pkcs8/pkcs8_x509.c", + "crypto/pkcs8/p5_pbev2.c", + "crypto/poly1305/poly1305.c", + "crypto/poly1305/poly1305_arm.c", + "crypto/poly1305/poly1305_vec.c", + "crypto/pool/pool.c", + "crypto/rand_extra/deterministic.c", + "crypto/rand_extra/forkunsafe.c", + "crypto/rand_extra/getentropy.c", + "crypto/rand_extra/ios.c", + "crypto/rand_extra/passive.c", + "crypto/rand_extra/rand_extra.c", + "crypto/rand_extra/trusty.c", + "crypto/rand_extra/windows.c", + "crypto/rc4/rc4.c", + "crypto/refcount.c", + "crypto/rsa_extra/rsa_asn1.c", + "crypto/rsa_extra/rsa_crypt.c", + "crypto/rsa_extra/rsa_print.c", + "crypto/spx/address.c", + "crypto/spx/fors.c", + "crypto/spx/merkle.c", + "crypto/spx/spx.c", + "crypto/spx/thash.c", + "crypto/spx/spx_util.c", + "crypto/spx/wots.c", + "crypto/stack/stack.c", + "crypto/siphash/siphash.c", + "crypto/thread.c", + "crypto/thread_none.c", + "crypto/thread_pthread.c", + "crypto/thread_win.c", + "crypto/trust_token/pmbtoken.c", + "crypto/trust_token/trust_token.c", + "crypto/trust_token/voprf.c", + "crypto/x509/a_digest.c", + "crypto/x509/a_sign.c", + "crypto/x509/a_verify.c", + "crypto/x509/algorithm.c", + "crypto/x509/asn1_gen.c", + "crypto/x509/by_dir.c", + "crypto/x509/by_file.c", + "crypto/x509/i2d_pr.c", + "crypto/x509/name_print.c", + "crypto/x509/policy.c", + "crypto/x509/rsa_pss.c", + "crypto/x509/t_crl.c", + "crypto/x509/t_req.c", + "crypto/x509/t_x509.c", + "crypto/x509/t_x509a.c", + "crypto/x509/v3_akey.c", + "crypto/x509/v3_akeya.c", + "crypto/x509/v3_alt.c", + "crypto/x509/v3_bcons.c", + "crypto/x509/v3_bitst.c", + "crypto/x509/v3_conf.c", + "crypto/x509/v3_cpols.c", + "crypto/x509/v3_crld.c", + "crypto/x509/v3_enum.c", + "crypto/x509/v3_extku.c", + "crypto/x509/v3_genn.c", + "crypto/x509/v3_ia5.c", + "crypto/x509/v3_info.c", + "crypto/x509/v3_int.c", + "crypto/x509/v3_lib.c", + "crypto/x509/v3_ncons.c", + "crypto/x509/v3_ocsp.c", + "crypto/x509/v3_pcons.c", + "crypto/x509/v3_pmaps.c", + "crypto/x509/v3_prn.c", + "crypto/x509/v3_purp.c", + "crypto/x509/v3_skey.c", + "crypto/x509/v3_utl.c", + "crypto/x509/x_algor.c", + "crypto/x509/x_all.c", + "crypto/x509/x_attrib.c", + "crypto/x509/x_crl.c", + "crypto/x509/x_exten.c", + "crypto/x509/x_name.c", + "crypto/x509/x_pubkey.c", + "crypto/x509/x_req.c", + "crypto/x509/x_sig.c", + "crypto/x509/x_spki.c", + "crypto/x509/x_val.c", + "crypto/x509/x_x509.c", + "crypto/x509/x_x509a.c", + "crypto/x509/x509_att.c", + "crypto/x509/x509_cmp.c", + "crypto/x509/x509_d2.c", + "crypto/x509/x509_def.c", + "crypto/x509/x509_ext.c", + "crypto/x509/x509_lu.c", + "crypto/x509/x509_obj.c", + "crypto/x509/x509_req.c", + "crypto/x509/x509_set.c", + "crypto/x509/x509_trs.c", + "crypto/x509/x509_txt.c", + "crypto/x509/x509_v3.c", + "crypto/x509/x509_vfy.c", + "crypto/x509/x509_vpm.c", + "crypto/x509/x509.c", + "crypto/x509/x509cset.c", + "crypto/x509/x509name.c", + "crypto/x509/x509rset.c", + "crypto/x509/x509spki.c" + ], + "hdrs": [ + "include/openssl/aead.h", + "include/openssl/aes.h", + "include/openssl/arm_arch.h", + "include/openssl/asm_base.h", + "include/openssl/asn1.h", + "include/openssl/asn1_mac.h", + "include/openssl/asn1t.h", + "include/openssl/base.h", + "include/openssl/base64.h", + "include/openssl/bio.h", + "include/openssl/blake2.h", + "include/openssl/blowfish.h", + "include/openssl/bn.h", + "include/openssl/buf.h", + "include/openssl/buffer.h", + "include/openssl/bytestring.h", + "include/openssl/cast.h", + "include/openssl/chacha.h", + "include/openssl/cipher.h", + "include/openssl/cmac.h", + "include/openssl/conf.h", + "include/openssl/cpu.h", + "include/openssl/crypto.h", + "include/openssl/ctrdrbg.h", + "include/openssl/curve25519.h", + "include/openssl/des.h", + "include/openssl/dh.h", + "include/openssl/digest.h", + "include/openssl/dsa.h", + "include/openssl/e_os2.h", + "include/openssl/ec.h", + "include/openssl/ec_key.h", + "include/openssl/ecdh.h", + "include/openssl/ecdsa.h", + "include/openssl/engine.h", + "include/openssl/err.h", + "include/openssl/evp.h", + "include/openssl/evp_errors.h", + "include/openssl/ex_data.h", + "include/openssl/experimental/kyber.h", + "include/openssl/experimental/spx.h", + "include/openssl/hkdf.h", + "include/openssl/hmac.h", + "include/openssl/hpke.h", + "include/openssl/hrss.h", + "include/openssl/is_boringssl.h", + "include/openssl/kdf.h", + "include/openssl/lhash.h", + "include/openssl/md4.h", + "include/openssl/md5.h", + "include/openssl/mem.h", + "include/openssl/nid.h", + "include/openssl/obj.h", + "include/openssl/obj_mac.h", + "include/openssl/objects.h", + "include/openssl/opensslconf.h", + "include/openssl/opensslv.h", + "include/openssl/ossl_typ.h", + "include/openssl/pem.h", + "include/openssl/pkcs12.h", + "include/openssl/pkcs7.h", + "include/openssl/pkcs8.h", + "include/openssl/poly1305.h", + "include/openssl/pool.h", + "include/openssl/posix_time.h", + "include/openssl/rand.h", + "include/openssl/rc4.h", + "include/openssl/ripemd.h", + "include/openssl/rsa.h", + "include/openssl/safestack.h", + "include/openssl/service_indicator.h", + "include/openssl/sha.h", + "include/openssl/siphash.h", + "include/openssl/span.h", + "include/openssl/stack.h", + "include/openssl/target.h", + "include/openssl/thread.h", + "include/openssl/time.h", + "include/openssl/trust_token.h", + "include/openssl/type_check.h", + "include/openssl/x509.h", + "include/openssl/x509_vfy.h", + "include/openssl/x509v3.h", + "include/openssl/x509v3_errors.h" + ], + "internal_hdrs": [ + "crypto/asn1/internal.h", + "crypto/bio/internal.h", + "crypto/bytestring/internal.h", + "crypto/chacha/internal.h", + "crypto/cipher_extra/internal.h", + "crypto/conf/conf_def.h", + "crypto/conf/internal.h", + "crypto/cpu_arm_linux.h", + "crypto/curve25519/curve25519_tables.h", + "crypto/curve25519/internal.h", + "crypto/des/internal.h", + "crypto/dsa/internal.h", + "crypto/ec_extra/internal.h", + "crypto/err/internal.h", + "crypto/evp/internal.h", + "crypto/fipsmodule/aes/internal.h", + "crypto/fipsmodule/bn/internal.h", + "crypto/fipsmodule/bn/rsaz_exp.h", + "crypto/fipsmodule/cipher/internal.h", + "crypto/fipsmodule/delocate.h", + "crypto/fipsmodule/dh/internal.h", + "crypto/fipsmodule/digest/internal.h", + "crypto/fipsmodule/digest/md32_common.h", + "crypto/fipsmodule/ec/builtin_curves.h", + "crypto/fipsmodule/ec/internal.h", + "crypto/fipsmodule/ec/p256-nistz-table.h", + "crypto/fipsmodule/ec/p256-nistz.h", + "crypto/fipsmodule/ec/p256_table.h", + "crypto/fipsmodule/ecdsa/internal.h", + "crypto/fipsmodule/md5/internal.h", + "crypto/fipsmodule/modes/internal.h", + "crypto/fipsmodule/rand/fork_detect.h", + "crypto/fipsmodule/rand/getrandom_fillin.h", + "crypto/fipsmodule/rand/internal.h", + "crypto/fipsmodule/rsa/internal.h", + "crypto/fipsmodule/service_indicator/internal.h", + "crypto/fipsmodule/sha/internal.h", + "crypto/fipsmodule/tls/internal.h", + "crypto/hrss/internal.h", + "crypto/internal.h", + "crypto/keccak/internal.h", + "crypto/kyber/internal.h", + "crypto/lhash/internal.h", + "crypto/obj/obj_dat.h", + "crypto/pkcs7/internal.h", + "crypto/pkcs8/internal.h", + "crypto/poly1305/internal.h", + "crypto/pool/internal.h", + "crypto/rsa_extra/internal.h", + "crypto/spx/address.h", + "crypto/spx/fors.h", + "crypto/spx/merkle.h", + "crypto/spx/params.h", + "crypto/spx/spx_util.h", + "crypto/spx/thash.h", + "crypto/spx/wots.h", + "crypto/trust_token/internal.h", + "crypto/x509/ext_dat.h", + "crypto/x509/internal.h", + "third_party/fiat/curve25519_32.h", + "third_party/fiat/curve25519_64.h", + "third_party/fiat/curve25519_64_adx.h", + "third_party/fiat/curve25519_64_msvc.h", + "third_party/fiat/p256_32.h", + "third_party/fiat/p256_64.h", + "third_party/fiat/p256_64_msvc.h" + ], + "err_data": [ + "crypto/err/*.errordata" + ], + "asm": [ + "crypto/curve25519/asm/x25519-asm-arm.S", + "crypto/hrss/asm/poly_rq_mul.S", + "crypto/poly1305/poly1305_arm_asm.S", + "third_party/fiat/asm/fiat_curve25519_adx_mul.S", + "third_party/fiat/asm/fiat_curve25519_adx_square.S", + "third_party/fiat/asm/fiat_p256_adx_mul.S", + "third_party/fiat/asm/fiat_p256_adx_sqr.S" + ], + "perlasm_aarch64": [ + {"src": "crypto/chacha/asm/chacha-armv8.pl"}, + {"src": "crypto/cipher_extra/asm/chacha20_poly1305_armv8.pl"} + ], + "perlasm_arm": [ + {"src": "crypto/chacha/asm/chacha-armv4.pl"} + ], + "perlasm_x86": [ + {"src": "crypto/chacha/asm/chacha-x86.pl"} + ], + "perlasm_x86_64": [ + {"src": "crypto/chacha/asm/chacha-x86_64.pl"}, + {"src": "crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl"}, + {"src": "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"} + ] + }, + "pki": { + "srcs": [ + "pki/cert_error_id.cc", + "pki/cert_error_params.cc", + "pki/cert_errors.cc", + "pki/cert_issuer_source_static.cc", + "pki/certificate.cc", + "pki/certificate_policies.cc", + "pki/common_cert_errors.cc", + "pki/crl.cc", + "pki/encode_values.cc", + "pki/extended_key_usage.cc", + "pki/general_names.cc", + "pki/input.cc", + "pki/ip_util.cc", + "pki/name_constraints.cc", + "pki/ocsp.cc", + "pki/ocsp_verify_result.cc", + "pki/parse_certificate.cc", + "pki/parse_name.cc", + "pki/parse_values.cc", + "pki/parsed_certificate.cc", + "pki/parser.cc", + "pki/path_builder.cc", + "pki/pem.cc", + "pki/revocation_util.cc", + "pki/signature_algorithm.cc", + "pki/simple_path_builder_delegate.cc", + "pki/string_util.cc", + "pki/trust_store_collection.cc", + "pki/trust_store_in_memory.cc", + "pki/trust_store.cc", + "pki/verify_certificate_chain.cc", + "pki/verify_error.cc", + "pki/verify_name_match.cc", + "pki/verify_signed_data.cc" + ], + "hdrs": [ + "include/openssl/pki/certificate.h", + "include/openssl/pki/signature_verify_cache.h", + "include/openssl/pki/verify_error.h" + ], + "internal_hdrs": [ + "pki/cert_error_id.h", + "pki/cert_error_params.h", + "pki/cert_errors.h", + "pki/cert_issuer_source.h", + "pki/cert_issuer_source_static.h", + // TODO(crbug.com/boringssl/542): This should be in pki_tests. + "pki/cert_issuer_source_sync_unittest.h", + "pki/certificate_policies.h", + "pki/common_cert_errors.h", + "pki/crl.h", + "pki/encode_values.h", + "pki/extended_key_usage.h", + "pki/general_names.h", + "pki/input.h", + "pki/ip_util.h", + "pki/mock_signature_verify_cache.h", + "pki/name_constraints.h", + "pki/nist_pkits_unittest.h", + "pki/ocsp.h", + "pki/ocsp_revocation_status.h", + "pki/ocsp_verify_result.h", + "pki/parse_certificate.h", + "pki/parse_name.h", + "pki/parse_values.h", + "pki/parsed_certificate.h", + "pki/parser.h", + "pki/path_builder.h", + "pki/pem.h", + "pki/revocation_util.h", + "pki/signature_algorithm.h", + "pki/simple_path_builder_delegate.h", + "pki/string_util.h", + // TODO(crbug.com/boringssl/542): This should be in pki_tests. + "pki/test_helpers.h", + // TODO(crbug.com/boringssl/542): This should be in pki_tests. + "pki/testdata/nist-pkits/pkits_testcases-inl.h", + "pki/trust_store.h", + "pki/trust_store_collection.h", + "pki/trust_store_in_memory.h", + "pki/verify_certificate_chain.h", + // TODO(crbug.com/boringssl/542): This should be in pki_tests. + "pki/verify_certificate_chain_typed_unittest.h", + "pki/verify_name_match.h", + "pki/verify_signed_data.h" + ] + }, + "ssl": { + "srcs": [ + "ssl/bio_ssl.cc", + "ssl/d1_both.cc", + "ssl/d1_lib.cc", + "ssl/d1_pkt.cc", + "ssl/d1_srtp.cc", + "ssl/dtls_method.cc", + "ssl/dtls_record.cc", + "ssl/encrypted_client_hello.cc", + "ssl/extensions.cc", + "ssl/handoff.cc", + "ssl/handshake_client.cc", + "ssl/handshake_server.cc", + "ssl/handshake.cc", + "ssl/s3_both.cc", + "ssl/s3_lib.cc", + "ssl/s3_pkt.cc", + "ssl/ssl_aead_ctx.cc", + "ssl/ssl_asn1.cc", + "ssl/ssl_buffer.cc", + "ssl/ssl_cert.cc", + "ssl/ssl_cipher.cc", + "ssl/ssl_credential.cc", + "ssl/ssl_file.cc", + "ssl/ssl_key_share.cc", + "ssl/ssl_lib.cc", + "ssl/ssl_privkey.cc", + "ssl/ssl_session.cc", + "ssl/ssl_stat.cc", + "ssl/ssl_transcript.cc", + "ssl/ssl_versions.cc", + "ssl/ssl_x509.cc", + "ssl/t1_enc.cc", + "ssl/tls_method.cc", + "ssl/tls_record.cc", + "ssl/tls13_both.cc", + "ssl/tls13_client.cc", + "ssl/tls13_enc.cc", + "ssl/tls13_server.cc" + ], + "hdrs": [ + "include/openssl/dtls1.h", + "include/openssl/srtp.h", + "include/openssl/ssl.h", + "include/openssl/ssl3.h", + "include/openssl/tls1.h" + ], + "internal_hdrs": [ + "ssl/internal.h" + ] + }, + "decrepit": { + "srcs": [ + "decrepit/bio/base64_bio.c", + "decrepit/blowfish/blowfish.c", + "decrepit/cast/cast.c", + "decrepit/cast/cast_tables.c", + "decrepit/cfb/cfb.c", + "decrepit/des/cfb64ede.c", + "decrepit/dh/dh_decrepit.c", + "decrepit/dsa/dsa_decrepit.c", + "decrepit/evp/dss1.c", + "decrepit/evp/evp_do_all.c", + "decrepit/obj/obj_decrepit.c", + "decrepit/rc4/rc4_decrepit.c", + "decrepit/ripemd/ripemd.c", + "decrepit/rsa/rsa_decrepit.c", + "decrepit/ssl/ssl_decrepit.c", + "decrepit/x509/x509_decrepit.c", + "decrepit/xts/xts.c" + ] + }, + "test_support": { + "srcs": [ + "crypto/test/abi_test.cc", + "crypto/test/file_test.cc", + "crypto/test/file_test_gtest.cc", + "crypto/test/file_util.cc", + "crypto/test/test_data.cc", + "crypto/test/test_util.cc", + "crypto/test/wycheproof_util.cc" + ], + "internal_hdrs": [ + "crypto/test/abi_test.h", + "crypto/test/file_test.h", + "crypto/test/file_util.h", + "crypto/test/gtest_main.h", + "crypto/test/test_data.h", + "crypto/test/test_util.h", + "crypto/test/wycheproof_util.h", + // TODO(crbug.com/boringssl/542): It's a bit odd that these are in + // test_support. Most of it is part of bssl_shim, except that the + // fuzzers use some of these headers. + "ssl/test/async_bio.h", + "ssl/test/fuzzer.h", + "ssl/test/fuzzer_tags.h", + "ssl/test/handshake_util.h", + "ssl/test/mock_quic_transport.h", + "ssl/test/packeted_bio.h", + "ssl/test/settings_writer.h", + "ssl/test/test_config.h", + "ssl/test/test_state.h" + ], + "perlasm_aarch64": [ + {"src": "crypto/test/asm/trampoline-armv8.pl"} + ], + "perlasm_arm": [ + {"src": "crypto/test/asm/trampoline-armv4.pl"} + ], + "perlasm_x86": [ + {"src": "crypto/test/asm/trampoline-x86.pl"} + ], + "perlasm_x86_64": [ + {"src": "crypto/test/asm/trampoline-x86_64.pl"} + ] + }, + "crypto_test": { + "srcs": [ + "crypto/abi_self_test.cc", + "crypto/asn1/asn1_test.cc", + "crypto/base64/base64_test.cc", + "crypto/bio/bio_test.cc", + "crypto/blake2/blake2_test.cc", + "crypto/buf/buf_test.cc", + "crypto/bytestring/bytestring_test.cc", + "crypto/chacha/chacha_test.cc", + "crypto/cipher_extra/aead_test.cc", + "crypto/cipher_extra/cipher_test.cc", + "crypto/compiler_test.cc", + "crypto/conf/conf_test.cc", + "crypto/constant_time_test.cc", + "crypto/cpu_arm_linux_test.cc", + "crypto/crypto_test.cc", + "crypto/curve25519/ed25519_test.cc", + "crypto/curve25519/spake25519_test.cc", + "crypto/curve25519/x25519_test.cc", + "crypto/ecdh_extra/ecdh_test.cc", + "crypto/dh_extra/dh_test.cc", + "crypto/digest_extra/digest_test.cc", + "crypto/dsa/dsa_test.cc", + "crypto/err/err_test.cc", + "crypto/evp/evp_extra_test.cc", + "crypto/evp/evp_test.cc", + "crypto/evp/pbkdf_test.cc", + "crypto/evp/scrypt_test.cc", + "crypto/fipsmodule/aes/aes_test.cc", + "crypto/fipsmodule/bn/bn_test.cc", + "crypto/fipsmodule/cmac/cmac_test.cc", + "crypto/fipsmodule/ec/ec_test.cc", + "crypto/fipsmodule/ec/p256-nistz_test.cc", + "crypto/fipsmodule/ec/p256_test.cc", + "crypto/fipsmodule/ecdsa/ecdsa_test.cc", + "crypto/fipsmodule/hkdf/hkdf_test.cc", + "crypto/fipsmodule/md5/md5_test.cc", + "crypto/fipsmodule/modes/gcm_test.cc", + "crypto/fipsmodule/rand/ctrdrbg_test.cc", + "crypto/fipsmodule/rand/fork_detect_test.cc", + "crypto/fipsmodule/service_indicator/service_indicator_test.cc", + "crypto/fipsmodule/sha/sha_test.cc", + "crypto/hpke/hpke_test.cc", + "crypto/hmac_extra/hmac_test.cc", + "crypto/hrss/hrss_test.cc", + "crypto/impl_dispatch_test.cc", + "crypto/keccak/keccak_test.cc", + "crypto/kyber/kyber_test.cc", + "crypto/lhash/lhash_test.cc", + "crypto/obj/obj_test.cc", + "crypto/pem/pem_test.cc", + "crypto/pkcs7/pkcs7_test.cc", + "crypto/pkcs8/pkcs8_test.cc", + "crypto/pkcs8/pkcs12_test.cc", + "crypto/poly1305/poly1305_test.cc", + "crypto/pool/pool_test.cc", + "crypto/rand_extra/rand_test.cc", + "crypto/rand_extra/getentropy_test.cc", + "crypto/refcount_test.cc", + "crypto/rsa_extra/rsa_test.cc", + "crypto/self_test.cc", + "crypto/stack/stack_test.cc", + "crypto/siphash/siphash_test.cc", + "crypto/spx/spx_test.cc", + "crypto/thread_test.cc", + "crypto/test/gtest_main.cc", + "crypto/trust_token/trust_token_test.cc", + "crypto/x509/tab_test.cc", + "crypto/x509/x509_test.cc", + "crypto/x509/x509_time_test.cc" + ], + "data": [ + "crypto/blake2/blake2b256_tests.txt", + "crypto/cipher_extra/test/*.txt", + "crypto/cipher_extra/test/nist_cavp/*.txt", + "crypto/curve25519/ed25519_tests.txt", + "crypto/ecdh_extra/ecdh_tests.txt", + "crypto/evp/evp_tests.txt", + "crypto/evp/scrypt_tests.txt", + "crypto/fipsmodule/aes/aes_tests.txt", + "crypto/fipsmodule/bn/test/*.txt", + "crypto/fipsmodule/cmac/cavp_3des_cmac_tests.txt", + "crypto/fipsmodule/cmac/cavp_aes128_cmac_tests.txt", + "crypto/fipsmodule/cmac/cavp_aes192_cmac_tests.txt", + "crypto/fipsmodule/cmac/cavp_aes256_cmac_tests.txt", + "crypto/fipsmodule/ec/ec_scalar_base_mult_tests.txt", + "crypto/fipsmodule/ec/p256-nistz_tests.txt", + "crypto/fipsmodule/ecdsa/ecdsa_sign_tests.txt", + "crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt", + "crypto/fipsmodule/modes/gcm_tests.txt", + "crypto/fipsmodule/rand/ctrdrbg_vectors.txt", + "crypto/hmac_extra/hmac_tests.txt", + "crypto/hpke/hpke_test_vectors.txt", + "crypto/keccak/keccak_tests.txt", + "crypto/kyber/kyber_tests.txt", + "crypto/pkcs8/test/*.p12", + "crypto/poly1305/poly1305_tests.txt", + "crypto/siphash/siphash_tests.txt", + "crypto/spx/spx_tests.txt", + "crypto/spx/spx_tests_deterministic.txt", + "crypto/x509/test/*.pem", + "third_party/wycheproof_testvectors/*.txt" + ] + }, + "urandom_test": { + "srcs": [ + "crypto/fipsmodule/rand/urandom_test.cc" + ] + }, + "pki_test": { + "srcs": [ + "crypto/test/gtest_main.cc", + "pki/cert_issuer_source_static_unittest.cc", + "pki/certificate_unittest.cc", + "pki/certificate_policies_unittest.cc", + "pki/crl_unittest.cc", + "pki/encode_values_unittest.cc", + "pki/extended_key_usage_unittest.cc", + "pki/general_names_unittest.cc", + "pki/input_unittest.cc", + "pki/ip_util_unittest.cc", + "pki/mock_signature_verify_cache.cc", + "pki/name_constraints_unittest.cc", + "pki/nist_pkits_unittest.cc", + "pki/ocsp_unittest.cc", + "pki/parse_certificate_unittest.cc", + "pki/parse_name_unittest.cc", + "pki/parse_values_unittest.cc", + "pki/parsed_certificate_unittest.cc", + "pki/parser_unittest.cc", + "pki/path_builder_pkits_unittest.cc", + "pki/path_builder_unittest.cc", + "pki/path_builder_verify_certificate_chain_unittest.cc", + "pki/pem_unittest.cc", + "pki/signature_algorithm_unittest.cc", + "pki/simple_path_builder_delegate_unittest.cc", + "pki/string_util_unittest.cc", + "pki/test_helpers.cc", + "pki/trust_store_collection_unittest.cc", + "pki/trust_store_in_memory_unittest.cc", + "pki/verify_certificate_chain_pkits_unittest.cc", + "pki/verify_certificate_chain_unittest.cc", + "pki/verify_name_match_unittest.cc", + "pki/verify_signed_data_unittest.cc" + ], + "data": [ + "pki/testdata/cert_issuer_source_static_unittest/*.pem", + "pki/testdata/certificate_policies_unittest/*.pem", + "pki/testdata/crl_unittest/*.pem", + "pki/testdata/name_constraints_unittest/*.pem", + "pki/testdata/nist-pkits/certs/*.crt", + "pki/testdata/nist-pkits/crls/*.crl", + "pki/testdata/ocsp_unittest/*.pem", + "pki/testdata/parse_certificate_unittest/*.pem", + "pki/testdata/parse_certificate_unittest/*/*.pem", + "pki/testdata/path_builder_unittest/*.pem", + "pki/testdata/path_builder_unittest/*/*.pem", + "pki/testdata/verify_certificate_chain_unittest/*/*.pem", + "pki/testdata/verify_certificate_chain_unittest/*/*.test", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/*.txt", + "pki/testdata/verify_name_match_unittest/names/*.pem", + "pki/testdata/verify_signed_data_unittest/*.pem", + "pki/testdata/verify_unittest/google-leaf.der", + "pki/testdata/verify_unittest/self-issued.pem" + ] + }, + "ssl_test": { + "srcs": [ + "crypto/test/gtest_main.cc", + "ssl/span_test.cc", + "ssl/ssl_c_test.c", + "ssl/ssl_test.cc" + ] + }, + "decrepit_test": { + "srcs": [ + "crypto/test/gtest_main.cc", + "decrepit/blowfish/blowfish_test.cc", + "decrepit/cast/cast_test.cc", + "decrepit/cfb/cfb_test.cc", + "decrepit/des/des_test.cc", + "decrepit/evp/evp_test.cc", + "decrepit/ripemd/ripemd_test.cc", + "decrepit/xts/xts_test.cc" + ] + }, + "bssl": { + "srcs": [ + "tool/args.cc", + "tool/ciphers.cc", + "tool/client.cc", + "tool/const.cc", + "tool/digest.cc", + "tool/fd.cc", + "tool/file.cc", + "tool/generate_ech.cc", + "tool/generate_ed25519.cc", + "tool/genrsa.cc", + "tool/pkcs12.cc", + "tool/rand.cc", + "tool/server.cc", + "tool/sign.cc", + "tool/speed.cc", + "tool/tool.cc", + "tool/transport_common.cc" + ], + "internal_hdrs": [ + "tool/internal.h", + "tool/transport_common.h" + ] + } +} diff --git a/yass/third_party/boringssl/src/cmake/go.cmake b/yass/third_party/boringssl/src/cmake/go.cmake index 51ecb45ced..76fcfebc1b 100644 --- a/yass/third_party/boringssl/src/cmake/go.cmake +++ b/yass/third_party/boringssl/src/cmake/go.cmake @@ -1,9 +1,15 @@ +# Go is an optional dependency. It's a necessary dependency if running tests or +# the FIPS build, which will check these. find_program(GO_EXECUTABLE go) -if(NOT GO_EXECUTABLE) - message(FATAL_ERROR "Could not find Go") -endif() + +function(require_go) + if(NOT GO_EXECUTABLE) + message(FATAL_ERROR "Could not find Go") + endif() +endfunction() function(go_executable dest package) + require_go() set(godeps "${PROJECT_SOURCE_DIR}/util/godeps.go") if(NOT CMAKE_GENERATOR STREQUAL "Ninja") # The DEPFILE parameter to add_custom_command only works with Ninja. Query diff --git a/yass/third_party/boringssl/src/cmake/perlasm.cmake b/yass/third_party/boringssl/src/cmake/perlasm.cmake deleted file mode 100644 index 17a47b9e03..0000000000 --- a/yass/third_party/boringssl/src/cmake/perlasm.cmake +++ /dev/null @@ -1,57 +0,0 @@ -macro(append_to_parent_scope var) - list(APPEND ${var} ${ARGN}) - set(${var} "${${var}}" PARENT_SCOPE) -endmacro() - -function(add_perlasm_target dest src) - get_filename_component(dir ${dest} DIRECTORY) - if(dir STREQUAL "") - set(dir ".") - endif() - - add_custom_command( - OUTPUT ${dest} - COMMAND ${CMAKE_COMMAND} -E make_directory ${dir} - COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${src} ${ARGN} - ${dest} - DEPENDS - ${src} - ${PROJECT_SOURCE_DIR}/crypto/perlasm/arm-xlate.pl - ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86_64-xlate.pl - ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86asm.pl - ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86gas.pl - ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86masm.pl - ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86nasm.pl - WORKING_DIRECTORY . - ) -endfunction() - -# perlasm generates perlasm output from a given file. arch specifies the -# architecture. dest specifies the basename of the output file. The list of -# generated files will be appended to ${var}_ASM and ${var}_NASM depending on -# the assembler used. Extra arguments are passed to the perlasm script. -function(perlasm var arch dest src) - if(arch STREQUAL "aarch64") - add_perlasm_target("${dest}-apple.S" ${src} ios64 ${ARGN}) - add_perlasm_target("${dest}-linux.S" ${src} linux64 ${ARGN}) - add_perlasm_target("${dest}-win.S" ${src} win64 ${ARGN}) - append_to_parent_scope("${var}_ASM" "${dest}-apple.S" "${dest}-linux.S" "${dest}-win.S") - elseif(arch STREQUAL "arm") - add_perlasm_target("${dest}-linux.S" ${src} linux32 ${ARGN}) - append_to_parent_scope("${var}_ASM" "${dest}-linux.S") - elseif(arch STREQUAL "x86") - add_perlasm_target("${dest}-apple.S" ${src} macosx -fPIC ${ARGN}) - add_perlasm_target("${dest}-linux.S" ${src} elf -fPIC ${ARGN}) - add_perlasm_target("${dest}-win.asm" ${src} win32n ${ARGN}) - append_to_parent_scope("${var}_ASM" "${dest}-apple.S" "${dest}-linux.S") - append_to_parent_scope("${var}_NASM" "${dest}-win.asm") - elseif(arch STREQUAL "x86_64") - add_perlasm_target("${dest}-apple.S" ${src} macosx ${ARGN}) - add_perlasm_target("${dest}-linux.S" ${src} elf ${ARGN}) - add_perlasm_target("${dest}-win.asm" ${src} nasm ${ARGN}) - append_to_parent_scope("${var}_ASM" "${dest}-apple.S" "${dest}-linux.S") - append_to_parent_scope("${var}_NASM" "${dest}-win.asm") - else() - message(FATAL_ERROR "Unknown perlasm architecture: $arch") - endif() -endfunction() diff --git a/yass/third_party/boringssl/src/crypto/CMakeLists.txt b/yass/third_party/boringssl/src/crypto/CMakeLists.txt deleted file mode 100644 index 8f7c1187ef..0000000000 --- a/yass/third_party/boringssl/src/crypto/CMakeLists.txt +++ /dev/null @@ -1,379 +0,0 @@ -if (MSVC AND NOT OPENSSL_NO_ASM) - # The ASM_MASM compiler id for this compiler is "MSVC", so fill out the runtime library table. - set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreaded "") - set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDLL "") - set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebug "") - set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebugDLL "") -endif() - -add_subdirectory(fipsmodule) - -if(FIPS_DELOCATE OR FIPS_SHARED) - SET_SOURCE_FILES_PROPERTIES(fipsmodule/bcm.o PROPERTIES EXTERNAL_OBJECT true) - SET_SOURCE_FILES_PROPERTIES(fipsmodule/bcm.o PROPERTIES GENERATED true) - - set( - CRYPTO_FIPS_OBJECTS - - fipsmodule/bcm.o - ) -endif() - -set( - CRYPTO_SOURCES_ASM - curve25519/asm/x25519-asm-arm.S - hrss/asm/poly_rq_mul.S - poly1305/poly1305_arm_asm.S - ../third_party/fiat/asm/fiat_curve25519_adx_mul.S - ../third_party/fiat/asm/fiat_curve25519_adx_square.S - ../third_party/fiat/asm/fiat_p256_adx_mul.S - ../third_party/fiat/asm/fiat_p256_adx_sqr.S -) -perlasm(CRYPTO_SOURCES aarch64 chacha/chacha-armv8 chacha/asm/chacha-armv8.pl) -perlasm(CRYPTO_SOURCES aarch64 cipher_extra/chacha20_poly1305_armv8 cipher_extra/asm/chacha20_poly1305_armv8.pl) -perlasm(CRYPTO_SOURCES aarch64 test/trampoline-armv8 test/asm/trampoline-armv8.pl) -perlasm(CRYPTO_SOURCES arm chacha/chacha-armv4 chacha/asm/chacha-armv4.pl) -perlasm(CRYPTO_SOURCES arm test/trampoline-armv4 test/asm/trampoline-armv4.pl) -perlasm(CRYPTO_SOURCES x86 chacha/chacha-x86 chacha/asm/chacha-x86.pl) -perlasm(CRYPTO_SOURCES x86 test/trampoline-x86 test/asm/trampoline-x86.pl) -perlasm(CRYPTO_SOURCES x86_64 chacha/chacha-x86_64 chacha/asm/chacha-x86_64.pl) -perlasm(CRYPTO_SOURCES x86_64 cipher_extra/aes128gcmsiv-x86_64 cipher_extra/asm/aes128gcmsiv-x86_64.pl) -perlasm(CRYPTO_SOURCES x86_64 cipher_extra/chacha20_poly1305_x86_64 cipher_extra/asm/chacha20_poly1305_x86_64.pl) -perlasm(CRYPTO_SOURCES x86_64 test/trampoline-x86_64 test/asm/trampoline-x86_64.pl) - -add_custom_command( - OUTPUT err_data.c - COMMAND ${GO_EXECUTABLE} run err_data_generate.go > ${CMAKE_CURRENT_BINARY_DIR}/err_data.c - DEPENDS - err/err_data_generate.go - err/asn1.errordata - err/bio.errordata - err/bn.errordata - err/cipher.errordata - err/conf.errordata - err/dh.errordata - err/digest.errordata - err/dsa.errordata - err/ecdh.errordata - err/ecdsa.errordata - err/ec.errordata - err/engine.errordata - err/evp.errordata - err/hkdf.errordata - err/obj.errordata - err/pem.errordata - err/pkcs7.errordata - err/pkcs8.errordata - err/rsa.errordata - err/ssl.errordata - err/trust_token.errordata - err/x509.errordata - err/x509v3.errordata - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/err -) - -add_library( - crypto - - asn1/a_bitstr.c - asn1/a_bool.c - asn1/a_d2i_fp.c - asn1/a_dup.c - asn1/a_gentm.c - asn1/a_i2d_fp.c - asn1/a_int.c - asn1/a_mbstr.c - asn1/a_object.c - asn1/a_octet.c - asn1/a_strex.c - asn1/a_strnid.c - asn1/a_time.c - asn1/a_type.c - asn1/a_utctm.c - asn1/asn1_lib.c - asn1/asn1_par.c - asn1/asn_pack.c - asn1/f_int.c - asn1/f_string.c - asn1/tasn_dec.c - asn1/tasn_enc.c - asn1/tasn_fre.c - asn1/tasn_new.c - asn1/tasn_typ.c - asn1/tasn_utl.c - asn1/posix_time.c - base64/base64.c - bio/bio.c - bio/bio_mem.c - bio/connect.c - bio/errno.c - bio/fd.c - bio/file.c - bio/hexdump.c - bio/pair.c - bio/printf.c - bio/socket.c - bio/socket_helper.c - blake2/blake2.c - bn_extra/bn_asn1.c - bn_extra/convert.c - buf/buf.c - bytestring/asn1_compat.c - bytestring/ber.c - bytestring/cbb.c - bytestring/cbs.c - bytestring/unicode.c - chacha/chacha.c - cipher_extra/cipher_extra.c - cipher_extra/derive_key.c - cipher_extra/e_aesctrhmac.c - cipher_extra/e_aesgcmsiv.c - cipher_extra/e_chacha20poly1305.c - cipher_extra/e_des.c - cipher_extra/e_null.c - cipher_extra/e_rc2.c - cipher_extra/e_rc4.c - cipher_extra/e_tls.c - cipher_extra/tls_cbc.c - conf/conf.c - cpu_aarch64_apple.c - cpu_aarch64_openbsd.c - cpu_aarch64_fuchsia.c - cpu_aarch64_linux.c - cpu_aarch64_sysreg.c - cpu_aarch64_win.c - cpu_arm_freebsd.c - cpu_arm_linux.c - cpu_intel.c - crypto.c - curve25519/curve25519.c - curve25519/curve25519_64_adx.c - curve25519/spake25519.c - des/des.c - dh_extra/params.c - dh_extra/dh_asn1.c - digest_extra/digest_extra.c - dsa/dsa.c - dsa/dsa_asn1.c - ecdh_extra/ecdh_extra.c - ecdsa_extra/ecdsa_asn1.c - ec_extra/ec_asn1.c - ec_extra/ec_derive.c - ec_extra/hash_to_curve.c - err/err.c - err_data.c - engine/engine.c - evp/evp.c - evp/evp_asn1.c - evp/evp_ctx.c - evp/p_dsa_asn1.c - evp/p_ec.c - evp/p_ec_asn1.c - evp/p_ed25519.c - evp/p_ed25519_asn1.c - evp/p_hkdf.c - evp/p_rsa.c - evp/p_rsa_asn1.c - evp/p_x25519.c - evp/p_x25519_asn1.c - evp/pbkdf.c - evp/print.c - evp/scrypt.c - evp/sign.c - ex_data.c - hpke/hpke.c - hrss/hrss.c - keccak/keccak.c - kyber/kyber.c - lhash/lhash.c - mem.c - obj/obj.c - obj/obj_xref.c - pem/pem_all.c - pem/pem_info.c - pem/pem_lib.c - pem/pem_oth.c - pem/pem_pk8.c - pem/pem_pkey.c - pem/pem_x509.c - pem/pem_xaux.c - pkcs7/pkcs7.c - pkcs7/pkcs7_x509.c - pkcs8/pkcs8.c - pkcs8/pkcs8_x509.c - pkcs8/p5_pbev2.c - poly1305/poly1305.c - poly1305/poly1305_arm.c - poly1305/poly1305_vec.c - pool/pool.c - rand_extra/deterministic.c - rand_extra/forkunsafe.c - rand_extra/getentropy.c - rand_extra/ios.c - rand_extra/passive.c - rand_extra/rand_extra.c - rand_extra/trusty.c - rand_extra/windows.c - rc4/rc4.c - refcount.c - rsa_extra/rsa_asn1.c - rsa_extra/rsa_crypt.c - rsa_extra/rsa_print.c - spx/address.c - spx/fors.c - spx/merkle.c - spx/spx.c - spx/thash.c - spx/spx_util.c - spx/wots.c - stack/stack.c - siphash/siphash.c - thread.c - thread_none.c - thread_pthread.c - thread_win.c - thread_win.cc - trust_token/pmbtoken.c - trust_token/trust_token.c - trust_token/voprf.c - x509/a_digest.c - x509/a_sign.c - x509/a_verify.c - x509/algorithm.c - x509/asn1_gen.c - x509/by_dir.c - x509/by_file.c - x509/i2d_pr.c - x509/name_print.c - x509/policy.c - x509/rsa_pss.c - x509/t_crl.c - x509/t_req.c - x509/t_x509.c - x509/t_x509a.c - x509/v3_akey.c - x509/v3_akeya.c - x509/v3_alt.c - x509/v3_bcons.c - x509/v3_bitst.c - x509/v3_conf.c - x509/v3_cpols.c - x509/v3_crld.c - x509/v3_enum.c - x509/v3_extku.c - x509/v3_genn.c - x509/v3_ia5.c - x509/v3_info.c - x509/v3_int.c - x509/v3_lib.c - x509/v3_ncons.c - x509/v3_ocsp.c - x509/v3_pcons.c - x509/v3_pmaps.c - x509/v3_prn.c - x509/v3_purp.c - x509/v3_skey.c - x509/v3_utl.c - x509/x_algor.c - x509/x_all.c - x509/x_attrib.c - x509/x_crl.c - x509/x_exten.c - x509/x_name.c - x509/x_pubkey.c - x509/x_req.c - x509/x_sig.c - x509/x_spki.c - x509/x_val.c - x509/x_x509.c - x509/x_x509a.c - x509/x509_att.c - x509/x509_cmp.c - x509/x509_d2.c - x509/x509_def.c - x509/x509_ext.c - x509/x509_lu.c - x509/x509_obj.c - x509/x509_req.c - x509/x509_set.c - x509/x509_trs.c - x509/x509_txt.c - x509/x509_v3.c - x509/x509_vfy.c - x509/x509_vpm.c - x509/x509.c - x509/x509cset.c - x509/x509name.c - x509/x509rset.c - x509/x509spki.c - - $ - ${CRYPTO_FIPS_OBJECTS} -) -if(OPENSSL_ASM) - target_sources(crypto PRIVATE ${CRYPTO_SOURCES_ASM}) -endif() -if(OPENSSL_NASM) - target_sources(crypto PRIVATE ${CRYPTO_SOURCES_NASM}) -endif() -target_include_directories(crypto PUBLIC - $ - $ -) -if (BORINGSSL_BUILD_TESTS) -install_if_enabled(TARGETS crypto EXPORT OpenSSLTargets ${INSTALL_DESTINATION_DEFAULT}) -endif() - -set_property(TARGET crypto PROPERTY EXPORT_NAME Crypto) - -if (MSVC) - set_target_properties(crypto PROPERTIES - COMPILE_PDB_NAME crypto - COMPILE_PDB_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR} - ) -endif() - -if(FIPS_SHARED) - # Rewrite libcrypto.so to inject the correct module hash value. This assumes - # UNIX-style library naming, but we only support FIPS mode on Linux anyway. - add_custom_command( - TARGET crypto POST_BUILD - COMMAND ${GO_EXECUTABLE} run - ${CMAKE_CURRENT_SOURCE_DIR}/../util/fipstools/inject_hash/inject_hash.go - -o libcrypto.so -in-object libcrypto.so - # The DEPENDS argument to a POST_BUILD rule appears to be ignored. Thus - # go_executable isn't used (as it doesn't get built), but we list this - # dependency anyway in case it starts working in some CMake version. - DEPENDS ../util/fipstools/inject_hash/inject_hash.go - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) -endif() - -add_dependencies(crypto boringssl_prefix_symbols) - -if(FIPS_DELOCATE OR FIPS_SHARED) - add_dependencies(crypto bcm_o_target) -endif() - -set_target_properties(crypto PROPERTIES LINKER_LANGUAGE C) - -if(WIN32) - target_link_libraries(crypto ws2_32) -endif() - -# CMAKE_SYSTEM_NAME is "Generic" for embedded OSes: -# https://cmake.org/cmake/help/book/mastering-cmake/chapter/Cross%20Compiling%20With%20CMake.html#toolchain-files -# -# For now we assume embedded OSes do not have threads. Additionally, the Threads -# package does not work with Android, but Android does not require any extra -# parameters to link pthreads. -if(NOT WIN32 AND NOT CMAKE_SYSTEM_NAME MATCHES "^(Generic|Android)$") - find_package(Threads REQUIRED) - target_link_libraries(crypto Threads::Threads) -endif() - -# Every target depends on crypto, so we add libcxx as a dependency here to -# simplify injecting it everywhere. -if(USE_CUSTOM_LIBCXX) - target_link_libraries(crypto libcxx) -endif() diff --git a/yass/third_party/boringssl/src/crypto/bytestring/unicode.c b/yass/third_party/boringssl/src/crypto/bytestring/unicode.c index 10fba07c97..76f37f9090 100644 --- a/yass/third_party/boringssl/src/crypto/bytestring/unicode.c +++ b/yass/third_party/boringssl/src/crypto/bytestring/unicode.c @@ -18,11 +18,12 @@ static int is_valid_code_point(uint32_t v) { - // References in the following are to Unicode 9.0.0. + // References in the following are to Unicode 15.0.0. if (// The Unicode space runs from zero to 0x10ffff (3.4 D9). v > 0x10ffff || // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved - // (3.4 D14) + // as noncharacters (3.4 D14). See also 23.7. As our APIs are intended for + // "open interchange", such as ASN.1, we reject them. (v & 0xfffe) == 0xfffe || (v >= 0xfdd0 && v <= 0xfdef) || // Surrogate code points are invalid (3.2 C1). diff --git a/yass/third_party/boringssl/src/crypto/cipher_extra/e_des.c b/yass/third_party/boringssl/src/crypto/cipher_extra/e_des.c index a4a876e028..62ac2a03a6 100644 --- a/yass/third_party/boringssl/src/crypto/cipher_extra/e_des.c +++ b/yass/third_party/boringssl/src/crypto/cipher_extra/e_des.c @@ -85,17 +85,14 @@ static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, } static const EVP_CIPHER evp_des_cbc = { - /* nid = */ NID_des_cbc, - /* block_size = */ 8, - /* key_len = */ 8, - /* iv_len = */ 8, - /* ctx_size = */ sizeof(EVP_DES_KEY), - /* flags = */ EVP_CIPH_CBC_MODE, - /* app_data = */ NULL, - /* init = */ des_init_key, - /* cipher = */ des_cbc_cipher, - /* cleanup = */ NULL, - /* ctrl = */ NULL, + .nid = NID_des_cbc, + .block_size = 8, + .key_len = 8, + .iv_len = 8, + .ctx_size = sizeof(EVP_DES_KEY), + .flags = EVP_CIPH_CBC_MODE, + .init = des_init_key, + .cipher = des_cbc_cipher, }; const EVP_CIPHER *EVP_des_cbc(void) { return &evp_des_cbc; } @@ -115,17 +112,14 @@ static int des_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, } static const EVP_CIPHER evp_des_ecb = { - /* nid = */ NID_des_ecb, - /* block_size = */ 8, - /* key_len = */ 8, - /* iv_len = */ 0, - /* ctx_size = */ sizeof(EVP_DES_KEY), - /* flags = */ EVP_CIPH_ECB_MODE, - /* app_data = */ NULL, - /* init = */ des_init_key, - /* cipher = */ des_ecb_cipher, - /* cleanup = */ NULL, - /* ctrl = */ NULL, + .nid = NID_des_ecb, + .block_size = 8, + .key_len = 8, + .iv_len = 0, + .ctx_size = sizeof(EVP_DES_KEY), + .flags = EVP_CIPH_ECB_MODE, + .init = des_init_key, + .cipher = des_ecb_cipher, }; const EVP_CIPHER *EVP_des_ecb(void) { return &evp_des_ecb; } @@ -155,17 +149,14 @@ static int des_ede3_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, } static const EVP_CIPHER evp_des_ede3_cbc = { - /* nid = */ NID_des_ede3_cbc, - /* block_size = */ 8, - /* key_len = */ 24, - /* iv_len = */ 8, - /* ctx_size = */ sizeof(DES_EDE_KEY), - /* flags = */ EVP_CIPH_CBC_MODE, - /* app_data = */ NULL, - /* init = */ des_ede3_init_key, - /* cipher = */ des_ede3_cbc_cipher, - /* cleanup = */ NULL, - /* ctrl = */ NULL, + .nid = NID_des_ede3_cbc, + .block_size = 8, + .key_len = 24, + .iv_len = 8, + .ctx_size = sizeof(DES_EDE_KEY), + .flags = EVP_CIPH_CBC_MODE, + .init = des_ede3_init_key, + .cipher = des_ede3_cbc_cipher, }; const EVP_CIPHER *EVP_des_ede3_cbc(void) { return &evp_des_ede3_cbc; } @@ -181,17 +172,14 @@ static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, } static const EVP_CIPHER evp_des_ede_cbc = { - /* nid = */ NID_des_ede_cbc, - /* block_size = */ 8, - /* key_len = */ 16, - /* iv_len = */ 8, - /* ctx_size = */ sizeof(DES_EDE_KEY), - /* flags = */ EVP_CIPH_CBC_MODE, - /* app_data = */ NULL, - /* init = */ des_ede_init_key, - /* cipher = */ des_ede3_cbc_cipher, - /* cleanup = */ NULL, - /* ctrl = */ NULL, + .nid = NID_des_ede_cbc, + .block_size = 8, + .key_len = 16, + .iv_len = 8, + .ctx_size = sizeof(DES_EDE_KEY), + .flags = EVP_CIPH_CBC_MODE, + .init = des_ede_init_key, + .cipher = des_ede3_cbc_cipher, }; const EVP_CIPHER *EVP_des_ede_cbc(void) { return &evp_des_ede_cbc; } @@ -212,33 +200,27 @@ static int des_ede_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, } static const EVP_CIPHER evp_des_ede = { - /* nid = */ NID_des_ede_ecb, - /* block_size = */ 8, - /* key_len = */ 16, - /* iv_len = */ 0, - /* ctx_size = */ sizeof(DES_EDE_KEY), - /* flags = */ EVP_CIPH_ECB_MODE, - /* app_data = */ NULL, - /* init = */ des_ede_init_key, - /* cipher = */ des_ede_ecb_cipher, - /* cleanup = */ NULL, - /* ctrl = */ NULL, + .nid = NID_des_ede_ecb, + .block_size = 8, + .key_len = 16, + .iv_len = 0, + .ctx_size = sizeof(DES_EDE_KEY), + .flags = EVP_CIPH_ECB_MODE, + .init = des_ede_init_key, + .cipher = des_ede_ecb_cipher, }; const EVP_CIPHER *EVP_des_ede(void) { return &evp_des_ede; } static const EVP_CIPHER evp_des_ede3 = { - /* nid = */ NID_des_ede3_ecb, - /* block_size = */ 8, - /* key_len = */ 24, - /* iv_len = */ 0, - /* ctx_size = */ sizeof(DES_EDE_KEY), - /* flags = */ EVP_CIPH_ECB_MODE, - /* app_data = */ NULL, - /* init = */ des_ede3_init_key, - /* cipher = */ des_ede_ecb_cipher, - /* cleanup = */ NULL, - /* ctrl = */ NULL, + .nid = NID_des_ede3_ecb, + .block_size = 8, + .key_len = 24, + .iv_len = 0, + .ctx_size = sizeof(DES_EDE_KEY), + .flags = EVP_CIPH_ECB_MODE, + .init = des_ede3_init_key, + .cipher = des_ede_ecb_cipher, }; const EVP_CIPHER *EVP_des_ede3(void) { return &evp_des_ede3; } diff --git a/yass/third_party/boringssl/src/crypto/cipher_extra/e_null.c b/yass/third_party/boringssl/src/crypto/cipher_extra/e_null.c index e9ee8adc79..ad99df924c 100644 --- a/yass/third_party/boringssl/src/crypto/cipher_extra/e_null.c +++ b/yass/third_party/boringssl/src/crypto/cipher_extra/e_null.c @@ -78,9 +78,13 @@ static int null_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, } static const EVP_CIPHER n_cipher = { - NID_undef, 1 /* block size */, 0 /* key_len */, 0 /* iv_len */, - 0 /* ctx_size */, 0 /* flags */, NULL /* app_data */, null_init_key, - null_cipher, NULL /* cleanup */, NULL /* ctrl */, + .nid = NID_undef, + .block_size = 1, + .key_len = 0, + .iv_len = 0, + .ctx_size = 0, + .init = null_init_key, + .cipher = null_cipher, }; const EVP_CIPHER *EVP_enc_null(void) { return &n_cipher; } diff --git a/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc2.c b/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc2.c index ffc5e6b1d9..c2a143ebac 100644 --- a/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc2.c +++ b/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc2.c @@ -427,37 +427,29 @@ static int rc2_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr) { } static const EVP_CIPHER rc2_40_cbc = { - NID_rc2_40_cbc, - 8 /* block size */, - 5 /* 40 bit */, - 8 /* iv len */, - sizeof(EVP_RC2_KEY), - EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH | EVP_CIPH_CTRL_INIT, - NULL /* app_data */, - rc2_init_key, - rc2_cbc_cipher, - NULL, - rc2_ctrl, + .nid = NID_rc2_40_cbc, + .block_size = 8, + .key_len = 5 /* 40 bit */, + .iv_len = 8, + .ctx_size = sizeof(EVP_RC2_KEY), + .flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH | EVP_CIPH_CTRL_INIT, + .init = rc2_init_key, + .cipher = rc2_cbc_cipher, + .ctrl = rc2_ctrl, }; -const EVP_CIPHER *EVP_rc2_40_cbc(void) { - return &rc2_40_cbc; -} +const EVP_CIPHER *EVP_rc2_40_cbc(void) { return &rc2_40_cbc; } static const EVP_CIPHER rc2_cbc = { - NID_rc2_cbc, - 8 /* block size */, - 16 /* 128 bit */, - 8 /* iv len */, - sizeof(EVP_RC2_KEY), - EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH | EVP_CIPH_CTRL_INIT, - NULL /* app_data */, - rc2_init_key, - rc2_cbc_cipher, - NULL, - rc2_ctrl, + .nid = NID_rc2_cbc, + .block_size = 8, + .key_len = 16 /* 128 bit */, + .iv_len = 8, + .ctx_size = sizeof(EVP_RC2_KEY), + .flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH | EVP_CIPH_CTRL_INIT, + .init = rc2_init_key, + .cipher = rc2_cbc_cipher, + .ctrl = rc2_ctrl, }; -const EVP_CIPHER *EVP_rc2_cbc(void) { - return &rc2_cbc; -} +const EVP_CIPHER *EVP_rc2_cbc(void) { return &rc2_cbc; } diff --git a/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc4.c b/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc4.c index 2f4f9bbac3..0c3404fd72 100644 --- a/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc4.c +++ b/yass/third_party/boringssl/src/crypto/cipher_extra/e_rc4.c @@ -81,9 +81,14 @@ static int rc4_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, } static const EVP_CIPHER rc4 = { - NID_rc4, 1 /* block_size */, 16 /* key_size */, - 0 /* iv_len */, sizeof(RC4_KEY), EVP_CIPH_VARIABLE_LENGTH, - NULL /* app_data */, rc4_init_key, rc4_cipher, - NULL /* cleanup */, NULL /* ctrl */, }; + .nid = NID_rc4, + .block_size = 1, + .key_len = 16, + .iv_len = 0, + .ctx_size = sizeof(RC4_KEY), + .flags = EVP_CIPH_VARIABLE_LENGTH, + .init = rc4_init_key, + .cipher = rc4_cipher, +}; const EVP_CIPHER *EVP_rc4(void) { return &rc4; } diff --git a/yass/third_party/boringssl/src/crypto/cipher_extra/tls_cbc.c b/yass/third_party/boringssl/src/crypto/cipher_extra/tls_cbc.c index ac6ca43f2f..ddbe4f2070 100644 --- a/yass/third_party/boringssl/src/crypto/cipher_extra/tls_cbc.c +++ b/yass/third_party/boringssl/src/crypto/cipher_extra/tls_cbc.c @@ -121,8 +121,8 @@ void EVP_tls_cbc_copy_mac(uint8_t *out, size_t md_size, const uint8_t *in, size_t mac_end = in_len; size_t mac_start = mac_end - md_size; - assert(orig_len >= in_len); - assert(in_len >= md_size); + declassify_assert(orig_len >= in_len); + declassify_assert(in_len >= md_size); assert(md_size <= EVP_MAX_MD_SIZE); assert(md_size > 0); diff --git a/yass/third_party/boringssl/src/crypto/curve25519/curve25519.c b/yass/third_party/boringssl/src/crypto/curve25519/curve25519.c index 581d74084e..761af4cc81 100644 --- a/yass/third_party/boringssl/src/crypto/curve25519/curve25519.c +++ b/yass/third_party/boringssl/src/crypto/curve25519/curve25519.c @@ -81,7 +81,7 @@ typedef uint64_t fe_limb_t; #define assert_fe(f) \ do { \ for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \ - assert(f[_assert_fe_i] <= UINT64_C(0x8cccccccccccc)); \ + declassify_assert(f[_assert_fe_i] <= UINT64_C(0x8cccccccccccc)); \ } \ } while (0) @@ -98,7 +98,7 @@ typedef uint64_t fe_limb_t; #define assert_fe_loose(f) \ do { \ for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \ - assert(f[_assert_fe_i] <= UINT64_C(0x1a666666666664)); \ + declassify_assert(f[_assert_fe_i] <= UINT64_C(0x1a666666666664)); \ } \ } while (0) @@ -120,8 +120,8 @@ typedef uint32_t fe_limb_t; #define assert_fe(f) \ do { \ for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \ - assert(f[_assert_fe_i] <= \ - ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \ + declassify_assert(f[_assert_fe_i] <= \ + ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \ } \ } while (0) @@ -138,8 +138,8 @@ typedef uint32_t fe_limb_t; #define assert_fe_loose(f) \ do { \ for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \ - assert(f[_assert_fe_i] <= \ - ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \ + declassify_assert(f[_assert_fe_i] <= \ + ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \ } \ } while (0) @@ -150,7 +150,7 @@ static_assert(sizeof(fe) == sizeof(fe_limb_t) * FE_NUM_LIMBS, static void fe_frombytes_strict(fe *h, const uint8_t s[32]) { // |fiat_25519_from_bytes| requires the top-most bit be clear. - assert((s[31] & 0x80) == 0); + declassify_assert((s[31] & 0x80) == 0); fiat_25519_from_bytes(h->v, s); assert_fe(h->v); } diff --git a/yass/third_party/boringssl/src/crypto/digest_extra/digest_extra.c b/yass/third_party/boringssl/src/crypto/digest_extra/digest_extra.c index 08ed671a80..f575035495 100644 --- a/yass/third_party/boringssl/src/crypto/digest_extra/digest_extra.c +++ b/yass/third_party/boringssl/src/crypto/digest_extra/digest_extra.c @@ -220,6 +220,7 @@ int EVP_marshal_digest_algorithm(CBB *cbb, const EVP_MD *md) { return 0; } + // TODO(crbug.com/boringssl/710): Is this correct? See RFC 4055, section 2.1. if (!CBB_add_asn1(&algorithm, &null, CBS_ASN1_NULL) || !CBB_flush(cbb)) { return 0; diff --git a/yass/third_party/boringssl/src/crypto/dsa/dsa.c b/yass/third_party/boringssl/src/crypto/dsa/dsa.c index c42b3335f0..a35b3849e4 100644 --- a/yass/third_party/boringssl/src/crypto/dsa/dsa.c +++ b/yass/third_party/boringssl/src/crypto/dsa/dsa.c @@ -274,6 +274,8 @@ int DSA_generate_parameters_ex(DSA *dsa, unsigned bits, const uint8_t *seed_in, if (!RAND_bytes(seed, qsize)) { goto err; } + // DSA parameters are public. + CONSTTIME_DECLASSIFY(seed, qsize); } else { // If we come back through, use random seed next time. seed_in = NULL; @@ -513,6 +515,9 @@ int DSA_generate_key(DSA *dsa) { goto err; } + // The public key is computed from the private key, but is public. + bn_declassify(pub_key); + dsa->priv_key = priv_key; dsa->pub_key = pub_key; ok = 1; @@ -649,6 +654,10 @@ redo: goto err; } + // The signature is computed from the private key, but is public. + bn_declassify(r); + bn_declassify(s); + // Redo if r or s is zero as required by FIPS 186-3: this is // very unlikely. if (BN_is_zero(r) || BN_is_zero(s)) { @@ -681,7 +690,7 @@ err: return ret; } -int DSA_do_verify(const uint8_t *digest, size_t digest_len, DSA_SIG *sig, +int DSA_do_verify(const uint8_t *digest, size_t digest_len, const DSA_SIG *sig, const DSA *dsa) { int valid; if (!DSA_do_check_signature(&valid, digest, digest_len, sig, dsa)) { @@ -691,7 +700,8 @@ int DSA_do_verify(const uint8_t *digest, size_t digest_len, DSA_SIG *sig, } int DSA_do_check_signature(int *out_valid, const uint8_t *digest, - size_t digest_len, DSA_SIG *sig, const DSA *dsa) { + size_t digest_len, const DSA_SIG *sig, + const DSA *dsa) { *out_valid = 0; if (!dsa_check_key(dsa)) { return 0; @@ -899,15 +909,19 @@ static int dsa_sign_setup(const DSA *dsa, BN_CTX *ctx, BIGNUM **out_kinv, ctx) || // Compute r = (g^k mod p) mod q !BN_mod_exp_mont_consttime(r, dsa->g, &k, dsa->p, ctx, - dsa->method_mont_p) || - // Note |BN_mod| below is not constant-time and may leak information about - // |r|. |dsa->p| may be significantly larger than |dsa->q|, so this is not - // easily performed in constant-time with Montgomery reduction. - // - // However, |r| at this point is g^k (mod p). It is almost the value of - // |r| revealed in the signature anyway (g^k (mod p) (mod q)), going from - // it to |k| would require computing a discrete log. - !BN_mod(r, r, dsa->q, ctx) || + dsa->method_mont_p)) { + OPENSSL_PUT_ERROR(DSA, ERR_R_BN_LIB); + goto err; + } + // Note |BN_mod| below is not constant-time and may leak information about + // |r|. |dsa->p| may be significantly larger than |dsa->q|, so this is not + // easily performed in constant-time with Montgomery reduction. + // + // However, |r| at this point is g^k (mod p). It is almost the value of |r| + // revealed in the signature anyway (g^k (mod p) (mod q)), going from it to + // |k| would require computing a discrete log. + bn_declassify(r); + if (!BN_mod(r, r, dsa->q, ctx) || // Compute part of 's = inv(k) (m + xr) mod q' using Fermat's Little // Theorem. !bn_mod_inverse_prime(kinv, &k, dsa->q, ctx, dsa->method_mont_q)) { diff --git a/yass/third_party/boringssl/src/crypto/dsa/dsa_asn1.c b/yass/third_party/boringssl/src/crypto/dsa/dsa_asn1.c index 1985bb4f6f..1caf4ae6d7 100644 --- a/yass/third_party/boringssl/src/crypto/dsa/dsa_asn1.c +++ b/yass/third_party/boringssl/src/crypto/dsa/dsa_asn1.c @@ -119,8 +119,9 @@ int dsa_check_key(const DSA *dsa) { if (dsa->priv_key != NULL) { // The private key is a non-zero element of the scalar field, determined by // |q|. - if (BN_is_negative(dsa->priv_key) || BN_is_zero(dsa->priv_key) || - BN_cmp(dsa->priv_key, dsa->q) >= 0) { + if (BN_is_negative(dsa->priv_key) || + constant_time_declassify_int(BN_is_zero(dsa->priv_key)) || + constant_time_declassify_int(BN_cmp(dsa->priv_key, dsa->q) >= 0)) { OPENSSL_PUT_ERROR(DSA, DSA_R_INVALID_PARAMETERS); return 0; } diff --git a/yass/third_party/boringssl/src/crypto/err/evp.errordata b/yass/third_party/boringssl/src/crypto/err/evp.errordata index a581b1b30b..f65b7b0a88 100644 --- a/yass/third_party/boringssl/src/crypto/err/evp.errordata +++ b/yass/third_party/boringssl/src/crypto/err/evp.errordata @@ -7,6 +7,7 @@ EVP,136,EMPTY_PSK EVP,105,ENCODE_ERROR EVP,106,EXPECTING_AN_EC_KEY_KEY EVP,107,EXPECTING_AN_RSA_KEY +EVP,138,EXPECTING_A_DH_KEY EVP,108,EXPECTING_A_DSA_KEY EVP,109,ILLEGAL_OR_UNSUPPORTED_PADDING_MODE EVP,137,INVALID_BUFFER_SIZE diff --git a/yass/third_party/boringssl/src/crypto/evp/evp.c b/yass/third_party/boringssl/src/crypto/evp/evp.c index f3f3d7e55c..d81abe9993 100644 --- a/yass/third_party/boringssl/src/crypto/evp/evp.c +++ b/yass/third_party/boringssl/src/crypto/evp/evp.c @@ -59,12 +59,9 @@ #include #include -#include -#include #include #include #include -#include #include #include "internal.h" @@ -149,9 +146,7 @@ int EVP_PKEY_cmp(const EVP_PKEY *a, const EVP_PKEY *b) { int EVP_PKEY_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) { if (to->type == EVP_PKEY_NONE) { - if (!EVP_PKEY_set_type(to, from->type)) { - return 0; - } + evp_pkey_set_method(to, from->ameth); } else if (to->type != from->type) { OPENSSL_PUT_ERROR(EVP, EVP_R_DIFFERENT_KEY_TYPES); return 0; @@ -225,117 +220,21 @@ static const EVP_PKEY_ASN1_METHOD *evp_pkey_asn1_find(int nid) { } } -static void evp_pkey_set_method(EVP_PKEY *pkey, - const EVP_PKEY_ASN1_METHOD *method) { +void evp_pkey_set_method(EVP_PKEY *pkey, const EVP_PKEY_ASN1_METHOD *method) { free_it(pkey); pkey->ameth = method; pkey->type = pkey->ameth->pkey_id; } int EVP_PKEY_type(int nid) { - const EVP_PKEY_ASN1_METHOD *meth = evp_pkey_asn1_find(nid); - if (meth == NULL) { - return NID_undef; - } - return meth->pkey_id; + // In OpenSSL, this was used to map between type aliases. BoringSSL supports + // no type aliases, so this function is just the identity. + return nid; } -int EVP_PKEY_set1_RSA(EVP_PKEY *pkey, RSA *key) { - if (EVP_PKEY_assign_RSA(pkey, key)) { - RSA_up_ref(key); - return 1; - } - return 0; -} - -int EVP_PKEY_assign_RSA(EVP_PKEY *pkey, RSA *key) { - evp_pkey_set_method(pkey, &rsa_asn1_meth); - pkey->pkey = key; - return key != NULL; -} - -RSA *EVP_PKEY_get0_RSA(const EVP_PKEY *pkey) { - if (pkey->type != EVP_PKEY_RSA) { - OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_AN_RSA_KEY); - return NULL; - } - return pkey->pkey; -} - -RSA *EVP_PKEY_get1_RSA(const EVP_PKEY *pkey) { - RSA *rsa = EVP_PKEY_get0_RSA(pkey); - if (rsa != NULL) { - RSA_up_ref(rsa); - } - return rsa; -} - -int EVP_PKEY_set1_DSA(EVP_PKEY *pkey, DSA *key) { - if (EVP_PKEY_assign_DSA(pkey, key)) { - DSA_up_ref(key); - return 1; - } - return 0; -} - -int EVP_PKEY_assign_DSA(EVP_PKEY *pkey, DSA *key) { - evp_pkey_set_method(pkey, &dsa_asn1_meth); - pkey->pkey = key; - return key != NULL; -} - -DSA *EVP_PKEY_get0_DSA(const EVP_PKEY *pkey) { - if (pkey->type != EVP_PKEY_DSA) { - OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_A_DSA_KEY); - return NULL; - } - return pkey->pkey; -} - -DSA *EVP_PKEY_get1_DSA(const EVP_PKEY *pkey) { - DSA *dsa = EVP_PKEY_get0_DSA(pkey); - if (dsa != NULL) { - DSA_up_ref(dsa); - } - return dsa; -} - -int EVP_PKEY_set1_EC_KEY(EVP_PKEY *pkey, EC_KEY *key) { - if (EVP_PKEY_assign_EC_KEY(pkey, key)) { - EC_KEY_up_ref(key); - return 1; - } - return 0; -} - -int EVP_PKEY_assign_EC_KEY(EVP_PKEY *pkey, EC_KEY *key) { - evp_pkey_set_method(pkey, &ec_asn1_meth); - pkey->pkey = key; - return key != NULL; -} - -EC_KEY *EVP_PKEY_get0_EC_KEY(const EVP_PKEY *pkey) { - if (pkey->type != EVP_PKEY_EC) { - OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_AN_EC_KEY_KEY); - return NULL; - } - return pkey->pkey; -} - -EC_KEY *EVP_PKEY_get1_EC_KEY(const EVP_PKEY *pkey) { - EC_KEY *ec_key = EVP_PKEY_get0_EC_KEY(pkey); - if (ec_key != NULL) { - EC_KEY_up_ref(ec_key); - } - return ec_key; -} - -DH *EVP_PKEY_get0_DH(const EVP_PKEY *pkey) { return NULL; } -DH *EVP_PKEY_get1_DH(const EVP_PKEY *pkey) { return NULL; } - int EVP_PKEY_assign(EVP_PKEY *pkey, int type, void *key) { - // This function can only be used to assign RSA, DSA, and EC keys. Other key - // types have internal representations which are not exposed through the + // This function can only be used to assign RSA, DSA, EC, and DH keys. Other + // key types have internal representations which are not exposed through the // public API. switch (type) { case EVP_PKEY_RSA: @@ -344,6 +243,8 @@ int EVP_PKEY_assign(EVP_PKEY *pkey, int type, void *key) { return EVP_PKEY_assign_DSA(pkey, key); case EVP_PKEY_EC: return EVP_PKEY_assign_EC_KEY(pkey, key); + case EVP_PKEY_DH: + return EVP_PKEY_assign_DH(pkey, key); } OPENSSL_PUT_ERROR(EVP, EVP_R_UNSUPPORTED_ALGORITHM); @@ -375,16 +276,26 @@ int EVP_PKEY_set_type(EVP_PKEY *pkey, int type) { EVP_PKEY *EVP_PKEY_new_raw_private_key(int type, ENGINE *unused, const uint8_t *in, size_t len) { - EVP_PKEY *ret = EVP_PKEY_new(); - if (ret == NULL || - !EVP_PKEY_set_type(ret, type)) { - goto err; + // To avoid pulling in all key types, look for specifically the key types that + // support |set_priv_raw|. + const EVP_PKEY_ASN1_METHOD *method; + switch (type) { + case EVP_PKEY_X25519: + method = &x25519_asn1_meth; + break; + case EVP_PKEY_ED25519: + method = &ed25519_asn1_meth; + break; + default: + OPENSSL_PUT_ERROR(EVP, EVP_R_UNSUPPORTED_ALGORITHM); + return 0; } - if (ret->ameth->set_priv_raw == NULL) { - OPENSSL_PUT_ERROR(EVP, EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); + EVP_PKEY *ret = EVP_PKEY_new(); + if (ret == NULL) { goto err; } + evp_pkey_set_method(ret, method); if (!ret->ameth->set_priv_raw(ret, in, len)) { goto err; @@ -399,16 +310,26 @@ err: EVP_PKEY *EVP_PKEY_new_raw_public_key(int type, ENGINE *unused, const uint8_t *in, size_t len) { - EVP_PKEY *ret = EVP_PKEY_new(); - if (ret == NULL || - !EVP_PKEY_set_type(ret, type)) { - goto err; + // To avoid pulling in all key types, look for specifically the key types that + // support |set_pub_raw|. + const EVP_PKEY_ASN1_METHOD *method; + switch (type) { + case EVP_PKEY_X25519: + method = &x25519_asn1_meth; + break; + case EVP_PKEY_ED25519: + method = &ed25519_asn1_meth; + break; + default: + OPENSSL_PUT_ERROR(EVP, EVP_R_UNSUPPORTED_ALGORITHM); + return 0; } - if (ret->ameth->set_pub_raw == NULL) { - OPENSSL_PUT_ERROR(EVP, EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); + EVP_PKEY *ret = EVP_PKEY_new(); + if (ret == NULL) { goto err; } + evp_pkey_set_method(ret, method); if (!ret->ameth->set_pub_raw(ret, in, len)) { goto err; diff --git a/yass/third_party/boringssl/src/crypto/evp/evp_asn1.c b/yass/third_party/boringssl/src/crypto/evp/evp_asn1.c index f270c3e773..11a0b8927f 100644 --- a/yass/third_party/boringssl/src/crypto/evp/evp_asn1.c +++ b/yass/third_party/boringssl/src/crypto/evp/evp_asn1.c @@ -69,6 +69,7 @@ #include "../internal.h" +// We intentionally omit |dh_asn1_meth| from this list. It is not serializable. static const EVP_PKEY_ASN1_METHOD *const kASN1Methods[] = { &rsa_asn1_meth, &ec_asn1_meth, @@ -77,28 +78,26 @@ static const EVP_PKEY_ASN1_METHOD *const kASN1Methods[] = { &x25519_asn1_meth, }; -static int parse_key_type(CBS *cbs, int *out_type) { +static const EVP_PKEY_ASN1_METHOD *parse_key_type(CBS *cbs) { CBS oid; if (!CBS_get_asn1(cbs, &oid, CBS_ASN1_OBJECT)) { - return 0; + return NULL; } for (unsigned i = 0; i < OPENSSL_ARRAY_SIZE(kASN1Methods); i++) { const EVP_PKEY_ASN1_METHOD *method = kASN1Methods[i]; if (CBS_len(&oid) == method->oid_len && OPENSSL_memcmp(CBS_data(&oid), method->oid, method->oid_len) == 0) { - *out_type = method->pkey_id; - return 1; + return method; } } - return 0; + return NULL; } EVP_PKEY *EVP_parse_public_key(CBS *cbs) { // Parse the SubjectPublicKeyInfo. CBS spki, algorithm, key; - int type; uint8_t padding; if (!CBS_get_asn1(cbs, &spki, CBS_ASN1_SEQUENCE) || !CBS_get_asn1(&spki, &algorithm, CBS_ASN1_SEQUENCE) || @@ -107,7 +106,8 @@ EVP_PKEY *EVP_parse_public_key(CBS *cbs) { OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR); return NULL; } - if (!parse_key_type(&algorithm, &type)) { + const EVP_PKEY_ASN1_METHOD *method = parse_key_type(&algorithm); + if (method == NULL) { OPENSSL_PUT_ERROR(EVP, EVP_R_UNSUPPORTED_ALGORITHM); return NULL; } @@ -121,10 +121,10 @@ EVP_PKEY *EVP_parse_public_key(CBS *cbs) { // Set up an |EVP_PKEY| of the appropriate type. EVP_PKEY *ret = EVP_PKEY_new(); - if (ret == NULL || - !EVP_PKEY_set_type(ret, type)) { + if (ret == NULL) { goto err; } + evp_pkey_set_method(ret, method); // Call into the type-specific SPKI decoding function. if (ret->ameth->pub_decode == NULL) { @@ -155,7 +155,6 @@ EVP_PKEY *EVP_parse_private_key(CBS *cbs) { // Parse the PrivateKeyInfo. CBS pkcs8, algorithm, key; uint64_t version; - int type; if (!CBS_get_asn1(cbs, &pkcs8, CBS_ASN1_SEQUENCE) || !CBS_get_asn1_uint64(&pkcs8, &version) || version != 0 || @@ -164,7 +163,8 @@ EVP_PKEY *EVP_parse_private_key(CBS *cbs) { OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR); return NULL; } - if (!parse_key_type(&algorithm, &type)) { + const EVP_PKEY_ASN1_METHOD *method = parse_key_type(&algorithm); + if (method == NULL) { OPENSSL_PUT_ERROR(EVP, EVP_R_UNSUPPORTED_ALGORITHM); return NULL; } @@ -173,10 +173,10 @@ EVP_PKEY *EVP_parse_private_key(CBS *cbs) { // Set up an |EVP_PKEY| of the appropriate type. EVP_PKEY *ret = EVP_PKEY_new(); - if (ret == NULL || - !EVP_PKEY_set_type(ret, type)) { + if (ret == NULL) { goto err; } + evp_pkey_set_method(ret, method); // Call into the type-specific PrivateKeyInfo decoding function. if (ret->ameth->priv_decode == NULL) { diff --git a/yass/third_party/boringssl/src/crypto/evp/evp_extra_test.cc b/yass/third_party/boringssl/src/crypto/evp/evp_extra_test.cc index 64683861a3..2a77165576 100644 --- a/yass/third_party/boringssl/src/crypto/evp/evp_extra_test.cc +++ b/yass/third_party/boringssl/src/crypto/evp/evp_extra_test.cc @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -1018,40 +1019,122 @@ static void ExpectECGroupAndKey(const EVP_PKEY *pkey, int nid) { } TEST(EVPExtraTest, ECKeygen) { - // |EVP_PKEY_paramgen| may be used as an extremely roundabout way to get an - // |EC_GROUP|. - bssl::UniquePtr ctx(EVP_PKEY_CTX_new_id(EVP_PKEY_EC, nullptr)); - ASSERT_TRUE(ctx); - ASSERT_TRUE(EVP_PKEY_paramgen_init(ctx.get())); - ASSERT_TRUE( - EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx.get(), NID_X9_62_prime256v1)); - EVP_PKEY *raw = nullptr; - ASSERT_TRUE(EVP_PKEY_paramgen(ctx.get(), &raw)); - bssl::UniquePtr pkey(raw); - raw = nullptr; - ExpectECGroupOnly(pkey.get(), NID_X9_62_prime256v1); + for (bool copy : {false, true}) { + SCOPED_TRACE(copy); - // That resulting |EVP_PKEY| may be used as a template for key generation. - ctx.reset(EVP_PKEY_CTX_new(pkey.get(), nullptr)); - ASSERT_TRUE(ctx); - ASSERT_TRUE(EVP_PKEY_keygen_init(ctx.get())); - raw = nullptr; - ASSERT_TRUE(EVP_PKEY_keygen(ctx.get(), &raw)); - pkey.reset(raw); - raw = nullptr; - ExpectECGroupAndKey(pkey.get(), NID_X9_62_prime256v1); + auto maybe_copy = [&](bssl::UniquePtr *ctx) -> bool { + if (copy) { + ctx->reset(EVP_PKEY_CTX_dup(ctx->get())); + } + return *ctx != nullptr; + }; - // |EVP_PKEY_paramgen| may also be skipped. - ctx.reset(EVP_PKEY_CTX_new_id(EVP_PKEY_EC, nullptr)); - ASSERT_TRUE(ctx); - ASSERT_TRUE(EVP_PKEY_keygen_init(ctx.get())); + // |EVP_PKEY_paramgen| may be used as an extremely roundabout way to get an + // |EC_GROUP|. + bssl::UniquePtr ctx( + EVP_PKEY_CTX_new_id(EVP_PKEY_EC, nullptr)); + ASSERT_TRUE(ctx); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_paramgen_init(ctx.get())); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx.get(), + NID_X9_62_prime256v1)); + ASSERT_TRUE(maybe_copy(&ctx)); + EVP_PKEY *raw = nullptr; + ASSERT_TRUE(EVP_PKEY_paramgen(ctx.get(), &raw)); + bssl::UniquePtr pkey(raw); + raw = nullptr; + ExpectECGroupOnly(pkey.get(), NID_X9_62_prime256v1); + + // That resulting |EVP_PKEY| may be used as a template for key generation. + ctx.reset(EVP_PKEY_CTX_new(pkey.get(), nullptr)); + ASSERT_TRUE(ctx); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_keygen_init(ctx.get())); + ASSERT_TRUE(maybe_copy(&ctx)); + raw = nullptr; + ASSERT_TRUE(EVP_PKEY_keygen(ctx.get(), &raw)); + pkey.reset(raw); + raw = nullptr; + ExpectECGroupAndKey(pkey.get(), NID_X9_62_prime256v1); + + // |EVP_PKEY_paramgen| may also be skipped. + ctx.reset(EVP_PKEY_CTX_new_id(EVP_PKEY_EC, nullptr)); + ASSERT_TRUE(ctx); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_keygen_init(ctx.get())); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx.get(), + NID_X9_62_prime256v1)); + ASSERT_TRUE(maybe_copy(&ctx)); + raw = nullptr; + ASSERT_TRUE(EVP_PKEY_keygen(ctx.get(), &raw)); + pkey.reset(raw); + raw = nullptr; + ExpectECGroupAndKey(pkey.get(), NID_X9_62_prime256v1); + } +} + +TEST(EVPExtraTest, DHKeygen) { + // Set up some DH params in an |EVP_PKEY|. There is currently no API to do + // this from EVP directly. + bssl::UniquePtr p(BN_get_rfc3526_prime_1536(nullptr)); + ASSERT_TRUE(p); + bssl::UniquePtr g(BN_new()); + ASSERT_TRUE(g); + ASSERT_TRUE(BN_set_u64(g.get(), 2)); + bssl::UniquePtr params_dh(DH_new()); + ASSERT_TRUE(params_dh); ASSERT_TRUE( - EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx.get(), NID_X9_62_prime256v1)); - raw = nullptr; - ASSERT_TRUE(EVP_PKEY_keygen(ctx.get(), &raw)); - pkey.reset(raw); - raw = nullptr; - ExpectECGroupAndKey(pkey.get(), NID_X9_62_prime256v1); + DH_set0_pqg(params_dh.get(), p.release(), /*q=*/nullptr, g.release())); + bssl::UniquePtr params(EVP_PKEY_new()); + ASSERT_TRUE(params); + ASSERT_TRUE(EVP_PKEY_set1_DH(params.get(), params_dh.get())); + + for (bool copy : {false, true}) { + SCOPED_TRACE(copy); + + auto maybe_copy = [&](bssl::UniquePtr *ctx) -> bool { + if (copy) { + ctx->reset(EVP_PKEY_CTX_dup(ctx->get())); + } + return *ctx != nullptr; + }; + + // |params| may be used as a template for key generation. + bssl::UniquePtr ctx(EVP_PKEY_CTX_new(params.get(), nullptr)); + ASSERT_TRUE(ctx); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_keygen_init(ctx.get())); + ASSERT_TRUE(maybe_copy(&ctx)); + EVP_PKEY *raw = nullptr; + ASSERT_TRUE(EVP_PKEY_keygen(ctx.get(), &raw)); + bssl::UniquePtr pkey(raw); + + EXPECT_EQ(EVP_PKEY_id(pkey.get()), EVP_PKEY_DH); + const DH *dh = EVP_PKEY_get0_DH(pkey.get()); + EXPECT_EQ(0, BN_cmp(DH_get0_p(dh), DH_get0_p(params_dh.get()))); + EXPECT_EQ(0, BN_cmp(DH_get0_g(dh), DH_get0_g(params_dh.get()))); + EXPECT_FALSE(DH_get0_q(dh)); + EXPECT_TRUE(DH_get0_pub_key(dh)); + EXPECT_TRUE(DH_get0_priv_key(dh)); + EXPECT_EQ(1, EVP_PKEY_cmp_parameters(params.get(), pkey.get())); + EXPECT_EQ(0, EVP_PKEY_cmp(params.get(), pkey.get())); + + // Generate a second key. + ctx.reset(EVP_PKEY_CTX_new(params.get(), nullptr)); + ASSERT_TRUE(ctx); + ASSERT_TRUE(maybe_copy(&ctx)); + ASSERT_TRUE(EVP_PKEY_keygen_init(ctx.get())); + ASSERT_TRUE(maybe_copy(&ctx)); + raw = nullptr; + ASSERT_TRUE(EVP_PKEY_keygen(ctx.get(), &raw)); + bssl::UniquePtr pkey2(raw); + + EXPECT_EQ(1, EVP_PKEY_cmp_parameters(params.get(), pkey2.get())); + EXPECT_EQ(1, EVP_PKEY_cmp_parameters(pkey.get(), pkey2.get())); + EXPECT_EQ(0, EVP_PKEY_cmp(pkey.get(), pkey2.get())); + } } // Test that |EVP_PKEY_keygen| works for Ed25519. @@ -1216,3 +1299,11 @@ TEST(EVPExtraTest, Parameters) { EXPECT_FALSE(EVP_PKEY_copy_parameters(rsa.get(), p256.get())); EXPECT_EQ(EVP_PKEY_RSA, EVP_PKEY_id(rsa.get())); } + +TEST(EVPExtraTest, RawKeyUnsupported) { + static const uint8_t kKey[] = {1, 2, 3, 4}; + EXPECT_FALSE( + EVP_PKEY_new_raw_public_key(EVP_PKEY_RSA, nullptr, kKey, sizeof(kKey))); + EXPECT_FALSE( + EVP_PKEY_new_raw_private_key(EVP_PKEY_RSA, nullptr, kKey, sizeof(kKey))); +} diff --git a/yass/third_party/boringssl/src/crypto/evp/evp_test.cc b/yass/third_party/boringssl/src/crypto/evp/evp_test.cc index fafd50bb53..9189d25973 100644 --- a/yass/third_party/boringssl/src/crypto/evp/evp_test.cc +++ b/yass/third_party/boringssl/src/crypto/evp/evp_test.cc @@ -70,9 +70,11 @@ OPENSSL_MSVC_PRAGMA(warning(pop)) #include +#include #include #include #include +#include #include #include #include @@ -249,6 +251,60 @@ static bool ImportKey(FileTest *t, KeyMap *key_map, return true; } +static bool GetOptionalBignum(FileTest *t, bssl::UniquePtr *out, + const std::string &key) { + if (!t->HasAttribute(key)) { + *out = nullptr; + return true; + } + + std::vector bytes; + if (!t->GetBytes(&bytes, key)) { + return false; + } + + out->reset(BN_bin2bn(bytes.data(), bytes.size(), nullptr)); + return *out != nullptr; +} + +static bool ImportDHKey(FileTest *t, KeyMap *key_map) { + bssl::UniquePtr p, q, g, pub_key, priv_key; + if (!GetOptionalBignum(t, &p, "P") || // + !GetOptionalBignum(t, &q, "Q") || // + !GetOptionalBignum(t, &g, "G") || + !GetOptionalBignum(t, &pub_key, "Public") || + !GetOptionalBignum(t, &priv_key, "Private")) { + return false; + } + + bssl::UniquePtr dh(DH_new()); + if (dh == nullptr || !DH_set0_pqg(dh.get(), p.get(), q.get(), g.get())) { + return false; + } + // |DH_set0_pqg| takes ownership on success. + p.release(); + q.release(); + g.release(); + + if (!DH_set0_key(dh.get(), pub_key.get(), priv_key.get())) { + return false; + } + // |DH_set0_key| takes ownership on success. + pub_key.release(); + priv_key.release(); + + bssl::UniquePtr pkey(EVP_PKEY_new()); + if (pkey == nullptr || !EVP_PKEY_set1_DH(pkey.get(), dh.get())) { + return false; + } + + // Save the key for future tests. + const std::string &key_name = t->GetParameter(); + EXPECT_EQ(0u, key_map->count(key_name)) << "Duplicate key: " << key_name; + (*key_map)[key_name] = std::move(pkey); + return true; +} + // SetupContext configures |ctx| based on attributes in |t|, with the exception // of the signing digest which must be configured externally. static bool SetupContext(FileTest *t, KeyMap *key_map, EVP_PKEY_CTX *ctx) { @@ -302,6 +358,9 @@ static bool SetupContext(FileTest *t, KeyMap *key_map, EVP_PKEY_CTX *ctx) { return false; } } + if (t->HasAttribute("DiffieHellmanPad") && !EVP_PKEY_CTX_set_dh_pad(ctx, 1)) { + return false; + } return true; } @@ -372,6 +431,10 @@ static bool TestEVP(FileTest *t, KeyMap *key_map) { return ImportKey(t, key_map, EVP_parse_public_key, EVP_marshal_public_key); } + if (t->GetType() == "DHKey") { + return ImportDHKey(t, key_map); + } + // Load the key. const std::string &key_name = t->GetParameter(); if (key_map->count(key_name) == 0) { diff --git a/yass/third_party/boringssl/src/crypto/evp/evp_tests.txt b/yass/third_party/boringssl/src/crypto/evp/evp_tests.txt index b9c8f9e0b8..e8e9f27a89 100644 --- a/yass/third_party/boringssl/src/crypto/evp/evp_tests.txt +++ b/yass/third_party/boringssl/src/crypto/evp/evp_tests.txt @@ -1717,3 +1717,83 @@ Output = c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552 Derive = X25519-Private DerivePeer = X25519-SmallOrderPeer Error = INVALID_PEER_KEY + +DHKey = DH-Public1 +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff +G = 02 +Public = c680b01bb49e303893a5c339c9448c63dc3d0ffdcb8024a784292ebccfe95bdfb97a456f51cb1decf904704d0dbab69689bd87cde04e4fcb793f66024a43dacd0830ac155f8149aeb42f3d1ec03b05c70a1349492d24e20b58b0283155816465c0efa3b01e78fe935f1633745826a0e5c8d87ca60418d816721503ccaf7540e7e3c13093a8fb72c34c452ab35cb07ec867f58f2c1d0ad6629b6359b2b990a50d29aedaca2efbf0b1b904005859f348797b9be660f438cc763dd2180ec7dd81c9 + +DHKey = DH-Private1 +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff +G = 02 +Public = c680b01bb49e303893a5c339c9448c63dc3d0ffdcb8024a784292ebccfe95bdfb97a456f51cb1decf904704d0dbab69689bd87cde04e4fcb793f66024a43dacd0830ac155f8149aeb42f3d1ec03b05c70a1349492d24e20b58b0283155816465c0efa3b01e78fe935f1633745826a0e5c8d87ca60418d816721503ccaf7540e7e3c13093a8fb72c34c452ab35cb07ec867f58f2c1d0ad6629b6359b2b990a50d29aedaca2efbf0b1b904005859f348797b9be660f438cc763dd2180ec7dd81c9 +Private = 05953ba55a5ff41a700744e06cebcd30f6fd76a6b1f7efb6bdc05028e7db2e50ef56385c65bad4a1cfff232c5d83179559e59a8901a88119ababdcc0c4e4fd75cdf6161fb07a72fb3d4c6c0fb140a2eb3e93627d4f2e93e086ba672149a4fb25594b2c6cb74a97a8e68d45097cc937cf30dd9141dbd3abdd4fb9fec45a240d528efa4a5b5690f40250a96ff54b0b90a3a0540e5cc54754579d4e65db233edcc9e55c26dd2a6f7fd8ee440b3f5bce547e0bb9197894f1728c2060b0597cbee547 + +DHKey = DH-Public2 +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff +G = 02 +Public = 98f7472e4950b3bfcb3f37bf02b77323f7919e434e11c6e4b76b9a55132e50c3dca3c7ac55b69126cd06d7c337be1552b81ac011d6f5e56a688b89e4fa10d31a5305b78b6591354b5a22678675bdd248b82a4b267eac643cfbec1cebce93fa8aa59558e5121e9f76fb119cf90e587661aba85b2a617304c9492e5565f21af693caceea9c7fb0f68909f3279ccfe347d7132e9e76a058f99b805f9b2275a082353f5be00258670d640cc9c7926984ebba4cffb3902961a6951373ac4915a70aeb + +DHKey = DH-Private2 +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff +G = 02 +Public = 98f7472e4950b3bfcb3f37bf02b77323f7919e434e11c6e4b76b9a55132e50c3dca3c7ac55b69126cd06d7c337be1552b81ac011d6f5e56a688b89e4fa10d31a5305b78b6591354b5a22678675bdd248b82a4b267eac643cfbec1cebce93fa8aa59558e5121e9f76fb119cf90e587661aba85b2a617304c9492e5565f21af693caceea9c7fb0f68909f3279ccfe347d7132e9e76a058f99b805f9b2275a082353f5be00258670d640cc9c7926984ebba4cffb3902961a6951373ac4915a70aeb +Private = 984de7473d1186e97b3dc4797f14ec8ab97df321192bf40e8fb575a2ab93210f6c32cc4d915cff27d2d4f9bbc661bc809243d116db8b844377993ae8399b4fa089c9404c7515003c71a2bfdd0361cc192dcf2e56a555105e2ef25b0c7545a6a30ba62607b0563ad46714ac8b6720446ad0e33af2c183cdf045b01ff0415fbdd8e2bd506729a84731fb68dd54a4caecfe028a09d157f94f48e90c3d5cb63f0db39e05d556a4dc85594c9c7f2f07c6dd27878512748fc8eba2652f2bd7a6395586 + +# By default, the leading zero is removed for OpenSSL compatibility (insecure). +Derive = DH-Private1 +DerivePeer = DH-Public2 +Output = 5d21ea6f2a141f62e77f3943a2fac88dae9bc6baf3030f467c6dd34582432c80ae0a16655e75f35dea69943503ab8a25b7bbc9cca8e82a85e14c52293635792fbc27d5089c60e528f519c054f4d89b9ef673a4167e8734e226c5bc1b88016ed8534e65e19574da4ccc5197f8cd681ea86794a294385cc7bac913f30bca359c142a7989663793fc173aa029cdd269dd29649e225bd5d7863bc084555e53ca3485fd813b6cf8f36b06b22fb42d57e19c5e00d01a8bbe7dcc6eea965178851495 + +Derive = DH-Private2 +DerivePeer = DH-Public1 +Output = 5d21ea6f2a141f62e77f3943a2fac88dae9bc6baf3030f467c6dd34582432c80ae0a16655e75f35dea69943503ab8a25b7bbc9cca8e82a85e14c52293635792fbc27d5089c60e528f519c054f4d89b9ef673a4167e8734e226c5bc1b88016ed8534e65e19574da4ccc5197f8cd681ea86794a294385cc7bac913f30bca359c142a7989663793fc173aa029cdd269dd29649e225bd5d7863bc084555e53ca3485fd813b6cf8f36b06b22fb42d57e19c5e00d01a8bbe7dcc6eea965178851495 + +# Setting EVP_PKEY_CTX_set_dh_pad fixes this. +Derive = DH-Private1 +DerivePeer = DH-Public2 +DiffieHellmanPad +Output = 005d21ea6f2a141f62e77f3943a2fac88dae9bc6baf3030f467c6dd34582432c80ae0a16655e75f35dea69943503ab8a25b7bbc9cca8e82a85e14c52293635792fbc27d5089c60e528f519c054f4d89b9ef673a4167e8734e226c5bc1b88016ed8534e65e19574da4ccc5197f8cd681ea86794a294385cc7bac913f30bca359c142a7989663793fc173aa029cdd269dd29649e225bd5d7863bc084555e53ca3485fd813b6cf8f36b06b22fb42d57e19c5e00d01a8bbe7dcc6eea965178851495 + +Derive = DH-Private2 +DerivePeer = DH-Public1 +DiffieHellmanPad +Output = 005d21ea6f2a141f62e77f3943a2fac88dae9bc6baf3030f467c6dd34582432c80ae0a16655e75f35dea69943503ab8a25b7bbc9cca8e82a85e14c52293635792fbc27d5089c60e528f519c054f4d89b9ef673a4167e8734e226c5bc1b88016ed8534e65e19574da4ccc5197f8cd681ea86794a294385cc7bac913f30bca359c142a7989663793fc173aa029cdd269dd29649e225bd5d7863bc084555e53ca3485fd813b6cf8f36b06b22fb42d57e19c5e00d01a8bbe7dcc6eea965178851495 + +Derive = DH-Public1 +DerivePeer = DH-Public2 +Error = NO_PRIVATE_VALUE + +DHKey = DH-WrongGroup +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327fffffffffffffffe +G = 02 +Public = 98f7472e4950b3bfcb3f37bf02b77323f7919e434e11c6e4b76b9a55132e50c3dca3c7ac55b69126cd06d7c337be1552b81ac011d6f5e56a688b89e4fa10d31a5305b78b6591354b5a22678675bdd248b82a4b267eac643cfbec1cebce93fa8aa59558e5121e9f76fb119cf90e587661aba85b2a617304c9492e5565f21af693caceea9c7fb0f68909f3279ccfe347d7132e9e76a058f99b805f9b2275a082353f5be00258670d640cc9c7926984ebba4cffb3902961a6951373ac4915a70aeb +Private = 984de7473d1186e97b3dc4797f14ec8ab97df321192bf40e8fb575a2ab93210f6c32cc4d915cff27d2d4f9bbc661bc809243d116db8b844377993ae8399b4fa089c9404c7515003c71a2bfdd0361cc192dcf2e56a555105e2ef25b0c7545a6a30ba62607b0563ad46714ac8b6720446ad0e33af2c183cdf045b01ff0415fbdd8e2bd506729a84731fb68dd54a4caecfe028a09d157f94f48e90c3d5cb63f0db39e05d556a4dc85594c9c7f2f07c6dd27878512748fc8eba2652f2bd7a6395586 + +Derive = DH-WrongGroup +DerivePeer = DH-Public2 +Error = DIFFERENT_PARAMETERS + +Derive = DH-Private1 +DerivePeer = DH-WrongGroup +Error = DIFFERENT_PARAMETERS + +DHKey = DH-Params +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff +G = 02 + +Derive = DH-Private1 +DerivePeer = DH-Params +Error = KEYS_NOT_SET + +DHKey = DH-Private1-With-Q +P = ffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff +Q = 7fffffffffffffffe487ed5110b4611a62633145c06e0e68948127044533e63a0105df531d89cd9128a5043cc71a026ef7ca8cd9e69d218d98158536f92f8a1ba7f09ab6b6a8e122f242dabb312f3f637a262174d31bf6b585ffae5b7a035bf6f71c35fdad44cfd2d74f9208be258ff324943328f6722d9ee1003e5c50b1df82cc6d241b0e2ae9cd348b1fd47e9267afc1b2ae91ee51d6cb0e3179ab1042a95dcf6a9483b84b4b36b3861aa7255e4c0278ba36046511b993ffffffffffffffff +G = 02 +Public = c680b01bb49e303893a5c339c9448c63dc3d0ffdcb8024a784292ebccfe95bdfb97a456f51cb1decf904704d0dbab69689bd87cde04e4fcb793f66024a43dacd0830ac155f8149aeb42f3d1ec03b05c70a1349492d24e20b58b0283155816465c0efa3b01e78fe935f1633745826a0e5c8d87ca60418d816721503ccaf7540e7e3c13093a8fb72c34c452ab35cb07ec867f58f2c1d0ad6629b6359b2b990a50d29aedaca2efbf0b1b904005859f348797b9be660f438cc763dd2180ec7dd81c9 +Private = 05953ba55a5ff41a700744e06cebcd30f6fd76a6b1f7efb6bdc05028e7db2e50ef56385c65bad4a1cfff232c5d83179559e59a8901a88119ababdcc0c4e4fd75cdf6161fb07a72fb3d4c6c0fb140a2eb3e93627d4f2e93e086ba672149a4fb25594b2c6cb74a97a8e68d45097cc937cf30dd9141dbd3abdd4fb9fec45a240d528efa4a5b5690f40250a96ff54b0b90a3a0540e5cc54754579d4e65db233edcc9e55c26dd2a6f7fd8ee440b3f5bce547e0bb9197894f1728c2060b0597cbee547 + +Derive = DH-Private1-With-Q +DerivePeer = DH-Public2 +DiffieHellmanPad +Output = 005d21ea6f2a141f62e77f3943a2fac88dae9bc6baf3030f467c6dd34582432c80ae0a16655e75f35dea69943503ab8a25b7bbc9cca8e82a85e14c52293635792fbc27d5089c60e528f519c054f4d89b9ef673a4167e8734e226c5bc1b88016ed8534e65e19574da4ccc5197f8cd681ea86794a294385cc7bac913f30bca359c142a7989663793fc173aa029cdd269dd29649e225bd5d7863bc084555e53ca3485fd813b6cf8f36b06b22fb42d57e19c5e00d01a8bbe7dcc6eea965178851495 diff --git a/yass/third_party/boringssl/src/crypto/evp/internal.h b/yass/third_party/boringssl/src/crypto/evp/internal.h index 6678c41de7..1d47427376 100644 --- a/yass/third_party/boringssl/src/crypto/evp/internal.h +++ b/yass/third_party/boringssl/src/crypto/evp/internal.h @@ -213,6 +213,7 @@ OPENSSL_EXPORT int EVP_PKEY_CTX_ctrl(EVP_PKEY_CTX *ctx, int keytype, int optype, #define EVP_PKEY_CTRL_HKDF_KEY (EVP_PKEY_ALG_CTRL + 16) #define EVP_PKEY_CTRL_HKDF_SALT (EVP_PKEY_ALG_CTRL + 17) #define EVP_PKEY_CTRL_HKDF_INFO (EVP_PKEY_ALG_CTRL + 18) +#define EVP_PKEY_CTRL_DH_PAD (EVP_PKEY_ALG_CTRL + 19) struct evp_pkey_ctx_st { // Method associated with this operation @@ -288,12 +289,18 @@ extern const EVP_PKEY_ASN1_METHOD ec_asn1_meth; extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meth; extern const EVP_PKEY_ASN1_METHOD ed25519_asn1_meth; extern const EVP_PKEY_ASN1_METHOD x25519_asn1_meth; +extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth; extern const EVP_PKEY_METHOD rsa_pkey_meth; extern const EVP_PKEY_METHOD ec_pkey_meth; extern const EVP_PKEY_METHOD ed25519_pkey_meth; extern const EVP_PKEY_METHOD x25519_pkey_meth; extern const EVP_PKEY_METHOD hkdf_pkey_meth; +extern const EVP_PKEY_METHOD dh_pkey_meth; + +// evp_pkey_set_method behaves like |EVP_PKEY_set_type|, but takes a pointer to +// a method table. This avoids depending on every |EVP_PKEY_ASN1_METHOD|. +void evp_pkey_set_method(EVP_PKEY *pkey, const EVP_PKEY_ASN1_METHOD *method); #if defined(__cplusplus) diff --git a/yass/third_party/boringssl/src/crypto/evp/p_dh.c b/yass/third_party/boringssl/src/crypto/evp/p_dh.c new file mode 100644 index 0000000000..9953f82a04 --- /dev/null +++ b/yass/third_party/boringssl/src/crypto/evp/p_dh.c @@ -0,0 +1,137 @@ +/* + * Copyright 2006-2019 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include + +#include + +#include +#include +#include + +#include "internal.h" + + +typedef struct dh_pkey_ctx_st { + int pad; +} DH_PKEY_CTX; + +static int pkey_dh_init(EVP_PKEY_CTX *ctx) { + DH_PKEY_CTX *dctx = OPENSSL_zalloc(sizeof(DH_PKEY_CTX)); + if (dctx == NULL) { + return 0; + } + + ctx->data = dctx; + return 1; +} + +static int pkey_dh_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) { + if (!pkey_dh_init(dst)) { + return 0; + } + + const DH_PKEY_CTX *sctx = src->data; + DH_PKEY_CTX *dctx = dst->data; + dctx->pad = sctx->pad; + return 1; +} + +static void pkey_dh_cleanup(EVP_PKEY_CTX *ctx) { + OPENSSL_free(ctx->data); + ctx->data = NULL; +} + +static int pkey_dh_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { + DH *dh = DH_new(); + if (dh == NULL || !EVP_PKEY_assign_DH(pkey, dh)) { + DH_free(dh); + return 0; + } + + if (ctx->pkey != NULL && !EVP_PKEY_copy_parameters(pkey, ctx->pkey)) { + return 0; + } + + return DH_generate_key(dh); +} + +static int pkey_dh_derive(EVP_PKEY_CTX *ctx, uint8_t *out, size_t *out_len) { + DH_PKEY_CTX *dctx = ctx->data; + if (ctx->pkey == NULL || ctx->peerkey == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_KEYS_NOT_SET); + return 0; + } + + DH *our_key = ctx->pkey->pkey; + DH *peer_key = ctx->peerkey->pkey; + if (our_key == NULL || peer_key == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_KEYS_NOT_SET); + return 0; + } + + const BIGNUM *pub_key = DH_get0_pub_key(peer_key); + if (pub_key == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_KEYS_NOT_SET); + return 0; + } + + if (out == NULL) { + *out_len = DH_size(our_key); + return 1; + } + + if (*out_len < (size_t)DH_size(our_key)) { + OPENSSL_PUT_ERROR(EVP, EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + int ret = dctx->pad ? DH_compute_key_padded(out, pub_key, our_key) + : DH_compute_key(out, pub_key, our_key); + if (ret < 0) { + return 0; + } + + assert(ret <= DH_size(our_key)); + *out_len = (size_t)ret; + return 1; +} + +static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) { + DH_PKEY_CTX *dctx = ctx->data; + switch (type) { + case EVP_PKEY_CTRL_PEER_KEY: + // |EVP_PKEY_derive_set_peer| requires the key implement this command, + // even if it is a no-op. + return 1; + + case EVP_PKEY_CTRL_DH_PAD: + dctx->pad = p1; + return 1; + + default: + OPENSSL_PUT_ERROR(EVP, EVP_R_COMMAND_NOT_SUPPORTED); + return 0; + } +} + +const EVP_PKEY_METHOD dh_pkey_meth = { + .pkey_id = EVP_PKEY_DH, + .init = pkey_dh_init, + .copy = pkey_dh_copy, + .cleanup = pkey_dh_cleanup, + .keygen = pkey_dh_keygen, + .derive = pkey_dh_derive, + .ctrl = pkey_dh_ctrl, +}; + +int EVP_PKEY_CTX_set_dh_pad(EVP_PKEY_CTX *ctx, int pad) { + return EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_DERIVE, + EVP_PKEY_CTRL_DH_PAD, pad, NULL); +} diff --git a/yass/third_party/boringssl/src/crypto/evp/p_dh_asn1.c b/yass/third_party/boringssl/src/crypto/evp/p_dh_asn1.c new file mode 100644 index 0000000000..b1038d1e8a --- /dev/null +++ b/yass/third_party/boringssl/src/crypto/evp/p_dh_asn1.c @@ -0,0 +1,120 @@ +/* + * Copyright 2006-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include + +#include +#include +#include + +#include "internal.h" +#include "../internal.h" + + +static void dh_free(EVP_PKEY *pkey) { + DH_free(pkey->pkey); + pkey->pkey = NULL; +} + +static int dh_size(const EVP_PKEY *pkey) { return DH_size(pkey->pkey); } + +static int dh_bits(const EVP_PKEY *pkey) { return DH_bits(pkey->pkey); } + +static int dh_param_missing(const EVP_PKEY *pkey) { + const DH *dh = pkey->pkey; + return dh == NULL || DH_get0_p(dh) == NULL || DH_get0_g(dh) == NULL; +} + +static int dh_param_copy(EVP_PKEY *to, const EVP_PKEY *from) { + if (dh_param_missing(from)) { + OPENSSL_PUT_ERROR(EVP, EVP_R_MISSING_PARAMETERS); + return 0; + } + + const DH *dh = from->pkey; + const BIGNUM *q_old = DH_get0_q(dh); + BIGNUM *p = BN_dup(DH_get0_p(dh)); + BIGNUM *q = q_old == NULL ? NULL : BN_dup(q_old); + BIGNUM *g = BN_dup(DH_get0_g(dh)); + if (p == NULL || (q_old != NULL && q == NULL) || g == NULL || + !DH_set0_pqg(to->pkey, p, q, g)) { + BN_free(p); + BN_free(q); + BN_free(g); + return 0; + } + + // |DH_set0_pqg| took ownership of |p|, |q|, and |g|. + return 1; +} + +static int dh_param_cmp(const EVP_PKEY *a, const EVP_PKEY *b) { + if (dh_param_missing(a) || dh_param_missing(b)) { + return -2; + } + + // Matching OpenSSL, only compare p and g for PKCS#3-style Diffie-Hellman. + // OpenSSL only checks q in X9.42-style Diffie-Hellman ("DHX"). + const DH *a_dh = a->pkey; + const DH *b_dh = b->pkey; + return BN_cmp(DH_get0_p(a_dh), DH_get0_p(b_dh)) == 0 && + BN_cmp(DH_get0_g(a_dh), DH_get0_g(b_dh)) == 0; +} + +static int dh_pub_cmp(const EVP_PKEY *a, const EVP_PKEY *b) { + if (dh_param_cmp(a, b) <= 0) { + return 0; + } + + const DH *a_dh = a->pkey; + const DH *b_dh = b->pkey; + return BN_cmp(DH_get0_pub_key(a_dh), DH_get0_pub_key(b_dh)) == 0; +} + +const EVP_PKEY_ASN1_METHOD dh_asn1_meth = { + .pkey_id = EVP_PKEY_DH, + .pkey_method = &dh_pkey_meth, + .pub_cmp = dh_pub_cmp, + .pkey_size = dh_size, + .pkey_bits = dh_bits, + .param_missing = dh_param_missing, + .param_copy = dh_param_copy, + .param_cmp = dh_param_cmp, + .pkey_free = dh_free, +}; + +int EVP_PKEY_set1_DH(EVP_PKEY *pkey, DH *key) { + if (EVP_PKEY_assign_DH(pkey, key)) { + DH_up_ref(key); + return 1; + } + return 0; +} + +int EVP_PKEY_assign_DH(EVP_PKEY *pkey, DH *key) { + evp_pkey_set_method(pkey, &dh_asn1_meth); + pkey->pkey = key; + return key != NULL; +} + +DH *EVP_PKEY_get0_DH(const EVP_PKEY *pkey) { + if (pkey->type != EVP_PKEY_DH) { + OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_A_DH_KEY); + return NULL; + } + return pkey->pkey; +} + +DH *EVP_PKEY_get1_DH(const EVP_PKEY *pkey) { + DH *dh = EVP_PKEY_get0_DH(pkey); + if (dh != NULL) { + DH_up_ref(dh); + } + return dh; +} diff --git a/yass/third_party/boringssl/src/crypto/evp/p_dsa_asn1.c b/yass/third_party/boringssl/src/crypto/evp/p_dsa_asn1.c index fe0421081d..98747470c3 100644 --- a/yass/third_party/boringssl/src/crypto/evp/p_dsa_asn1.c +++ b/yass/third_party/boringssl/src/crypto/evp/p_dsa_asn1.c @@ -306,3 +306,33 @@ int EVP_PKEY_CTX_set_dsa_paramgen_q_bits(EVP_PKEY_CTX *ctx, int qbits) { OPENSSL_PUT_ERROR(EVP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); return 0; } + +int EVP_PKEY_set1_DSA(EVP_PKEY *pkey, DSA *key) { + if (EVP_PKEY_assign_DSA(pkey, key)) { + DSA_up_ref(key); + return 1; + } + return 0; +} + +int EVP_PKEY_assign_DSA(EVP_PKEY *pkey, DSA *key) { + evp_pkey_set_method(pkey, &dsa_asn1_meth); + pkey->pkey = key; + return key != NULL; +} + +DSA *EVP_PKEY_get0_DSA(const EVP_PKEY *pkey) { + if (pkey->type != EVP_PKEY_DSA) { + OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_A_DSA_KEY); + return NULL; + } + return pkey->pkey; +} + +DSA *EVP_PKEY_get1_DSA(const EVP_PKEY *pkey) { + DSA *dsa = EVP_PKEY_get0_DSA(pkey); + if (dsa != NULL) { + DSA_up_ref(dsa); + } + return dsa; +} diff --git a/yass/third_party/boringssl/src/crypto/evp/p_ec.c b/yass/third_party/boringssl/src/crypto/evp/p_ec.c index 0e4349f05c..7e1594e698 100644 --- a/yass/third_party/boringssl/src/crypto/evp/p_ec.c +++ b/yass/third_party/boringssl/src/crypto/evp/p_ec.c @@ -90,15 +90,14 @@ static int pkey_ec_init(EVP_PKEY_CTX *ctx) { } static int pkey_ec_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) { - EC_PKEY_CTX *dctx, *sctx; if (!pkey_ec_init(dst)) { return 0; } - sctx = src->data; - dctx = dst->data; + const EC_PKEY_CTX *sctx = src->data; + EC_PKEY_CTX *dctx = dst->data; dctx->md = sctx->md; - + dctx->gen_group = sctx->gen_group; return 1; } diff --git a/yass/third_party/boringssl/src/crypto/evp/p_ec_asn1.c b/yass/third_party/boringssl/src/crypto/evp/p_ec_asn1.c index 9a9d463db9..54d02c3c4a 100644 --- a/yass/third_party/boringssl/src/crypto/evp/p_ec_asn1.c +++ b/yass/third_party/boringssl/src/crypto/evp/p_ec_asn1.c @@ -300,3 +300,33 @@ const EVP_PKEY_ASN1_METHOD ec_asn1_meth = { int_ec_free, }; + +int EVP_PKEY_set1_EC_KEY(EVP_PKEY *pkey, EC_KEY *key) { + if (EVP_PKEY_assign_EC_KEY(pkey, key)) { + EC_KEY_up_ref(key); + return 1; + } + return 0; +} + +int EVP_PKEY_assign_EC_KEY(EVP_PKEY *pkey, EC_KEY *key) { + evp_pkey_set_method(pkey, &ec_asn1_meth); + pkey->pkey = key; + return key != NULL; +} + +EC_KEY *EVP_PKEY_get0_EC_KEY(const EVP_PKEY *pkey) { + if (pkey->type != EVP_PKEY_EC) { + OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_AN_EC_KEY_KEY); + return NULL; + } + return pkey->pkey; +} + +EC_KEY *EVP_PKEY_get1_EC_KEY(const EVP_PKEY *pkey) { + EC_KEY *ec_key = EVP_PKEY_get0_EC_KEY(pkey); + if (ec_key != NULL) { + EC_KEY_up_ref(ec_key); + } + return ec_key; +} diff --git a/yass/third_party/boringssl/src/crypto/evp/p_ed25519.c b/yass/third_party/boringssl/src/crypto/evp/p_ed25519.c index 647ea05e60..66e7197f32 100644 --- a/yass/third_party/boringssl/src/crypto/evp/p_ed25519.c +++ b/yass/third_party/boringssl/src/crypto/evp/p_ed25519.c @@ -30,10 +30,7 @@ static int pkey_ed25519_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { return 0; } - if (!EVP_PKEY_set_type(pkey, EVP_PKEY_ED25519)) { - OPENSSL_free(key); - return 0; - } + evp_pkey_set_method(pkey, &ed25519_asn1_meth); uint8_t pubkey_unused[32]; ED25519_keypair(pubkey_unused, key->key); diff --git a/yass/third_party/boringssl/src/crypto/evp/p_rsa_asn1.c b/yass/third_party/boringssl/src/crypto/evp/p_rsa_asn1.c index dd64731be4..24b8e17045 100644 --- a/yass/third_party/boringssl/src/crypto/evp/p_rsa_asn1.c +++ b/yass/third_party/boringssl/src/crypto/evp/p_rsa_asn1.c @@ -209,3 +209,33 @@ const EVP_PKEY_ASN1_METHOD rsa_asn1_meth = { int_rsa_free, }; + +int EVP_PKEY_set1_RSA(EVP_PKEY *pkey, RSA *key) { + if (EVP_PKEY_assign_RSA(pkey, key)) { + RSA_up_ref(key); + return 1; + } + return 0; +} + +int EVP_PKEY_assign_RSA(EVP_PKEY *pkey, RSA *key) { + evp_pkey_set_method(pkey, &rsa_asn1_meth); + pkey->pkey = key; + return key != NULL; +} + +RSA *EVP_PKEY_get0_RSA(const EVP_PKEY *pkey) { + if (pkey->type != EVP_PKEY_RSA) { + OPENSSL_PUT_ERROR(EVP, EVP_R_EXPECTING_AN_RSA_KEY); + return NULL; + } + return pkey->pkey; +} + +RSA *EVP_PKEY_get1_RSA(const EVP_PKEY *pkey) { + RSA *rsa = EVP_PKEY_get0_RSA(pkey); + if (rsa != NULL) { + RSA_up_ref(rsa); + } + return rsa; +} diff --git a/yass/third_party/boringssl/src/crypto/evp/p_x25519.c b/yass/third_party/boringssl/src/crypto/evp/p_x25519.c index 6218943e0f..a4c9136fd0 100644 --- a/yass/third_party/boringssl/src/crypto/evp/p_x25519.c +++ b/yass/third_party/boringssl/src/crypto/evp/p_x25519.c @@ -30,10 +30,7 @@ static int pkey_x25519_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { return 0; } - if (!EVP_PKEY_set_type(pkey, EVP_PKEY_X25519)) { - OPENSSL_free(key); - return 0; - } + evp_pkey_set_method(pkey, &x25519_asn1_meth); X25519_keypair(key->pub, key->priv); key->has_private = 1; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/CMakeLists.txt b/yass/third_party/boringssl/src/crypto/fipsmodule/CMakeLists.txt deleted file mode 100644 index 9ab3da38a3..0000000000 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/CMakeLists.txt +++ /dev/null @@ -1,181 +0,0 @@ -perlasm(BCM_SOURCES aarch64 aesv8-armv8 aes/asm/aesv8-armx.pl) -perlasm(BCM_SOURCES aarch64 aesv8-gcm-armv8 modes/asm/aesv8-gcm-armv8.pl) -perlasm(BCM_SOURCES aarch64 armv8-mont bn/asm/armv8-mont.pl) -perlasm(BCM_SOURCES aarch64 bn-armv8 bn/asm/bn-armv8.pl) -perlasm(BCM_SOURCES aarch64 ghash-neon-armv8 modes/asm/ghash-neon-armv8.pl) -perlasm(BCM_SOURCES aarch64 ghashv8-armv8 modes/asm/ghashv8-armx.pl) -perlasm(BCM_SOURCES aarch64 p256_beeu-armv8-asm ec/asm/p256_beeu-armv8-asm.pl) -perlasm(BCM_SOURCES aarch64 p256-armv8-asm ec/asm/p256-armv8-asm.pl) -perlasm(BCM_SOURCES aarch64 sha1-armv8 sha/asm/sha1-armv8.pl) -perlasm(BCM_SOURCES aarch64 sha256-armv8 sha/asm/sha512-armv8.pl sha256) -perlasm(BCM_SOURCES aarch64 sha512-armv8 sha/asm/sha512-armv8.pl sha512) -perlasm(BCM_SOURCES aarch64 vpaes-armv8 aes/asm/vpaes-armv8.pl) -perlasm(BCM_SOURCES arm aesv8-armv7 aes/asm/aesv8-armx.pl) -perlasm(BCM_SOURCES arm armv4-mont bn/asm/armv4-mont.pl) -perlasm(BCM_SOURCES arm bsaes-armv7 aes/asm/bsaes-armv7.pl) -perlasm(BCM_SOURCES arm ghash-armv4 modes/asm/ghash-armv4.pl) -perlasm(BCM_SOURCES arm ghashv8-armv7 modes/asm/ghashv8-armx.pl) -perlasm(BCM_SOURCES arm sha1-armv4-large sha/asm/sha1-armv4-large.pl) -perlasm(BCM_SOURCES arm sha256-armv4 sha/asm/sha256-armv4.pl) -perlasm(BCM_SOURCES arm sha512-armv4 sha/asm/sha512-armv4.pl) -perlasm(BCM_SOURCES arm vpaes-armv7 aes/asm/vpaes-armv7.pl) -perlasm(BCM_SOURCES x86 aesni-x86 aes/asm/aesni-x86.pl) -perlasm(BCM_SOURCES x86 bn-586 bn/asm/bn-586.pl) -perlasm(BCM_SOURCES x86 co-586 bn/asm/co-586.pl) -perlasm(BCM_SOURCES x86 ghash-ssse3-x86 modes/asm/ghash-ssse3-x86.pl) -perlasm(BCM_SOURCES x86 ghash-x86 modes/asm/ghash-x86.pl) -perlasm(BCM_SOURCES x86 md5-586 md5/asm/md5-586.pl) -perlasm(BCM_SOURCES x86 sha1-586 sha/asm/sha1-586.pl) -perlasm(BCM_SOURCES x86 sha256-586 sha/asm/sha256-586.pl) -perlasm(BCM_SOURCES x86 sha512-586 sha/asm/sha512-586.pl) -perlasm(BCM_SOURCES x86 vpaes-x86 aes/asm/vpaes-x86.pl) -perlasm(BCM_SOURCES x86 x86-mont bn/asm/x86-mont.pl) -perlasm(BCM_SOURCES x86_64 aesni-gcm-x86_64 modes/asm/aesni-gcm-x86_64.pl) -perlasm(BCM_SOURCES x86_64 aesni-x86_64 aes/asm/aesni-x86_64.pl) -perlasm(BCM_SOURCES x86_64 ghash-ssse3-x86_64 modes/asm/ghash-ssse3-x86_64.pl) -perlasm(BCM_SOURCES x86_64 ghash-x86_64 modes/asm/ghash-x86_64.pl) -perlasm(BCM_SOURCES x86_64 md5-x86_64 md5/asm/md5-x86_64.pl) -perlasm(BCM_SOURCES x86_64 p256_beeu-x86_64-asm ec/asm/p256_beeu-x86_64-asm.pl) -perlasm(BCM_SOURCES x86_64 p256-x86_64-asm ec/asm/p256-x86_64-asm.pl) -perlasm(BCM_SOURCES x86_64 rdrand-x86_64 rand/asm/rdrand-x86_64.pl) -perlasm(BCM_SOURCES x86_64 rsaz-avx2 bn/asm/rsaz-avx2.pl) -perlasm(BCM_SOURCES x86_64 sha1-x86_64 sha/asm/sha1-x86_64.pl) -perlasm(BCM_SOURCES x86_64 sha256-x86_64 sha/asm/sha512-x86_64.pl sha256) -perlasm(BCM_SOURCES x86_64 sha512-x86_64 sha/asm/sha512-x86_64.pl sha512) -perlasm(BCM_SOURCES x86_64 vpaes-x86_64 aes/asm/vpaes-x86_64.pl) -perlasm(BCM_SOURCES x86_64 x86_64-mont bn/asm/x86_64-mont.pl) -perlasm(BCM_SOURCES x86_64 x86_64-mont5 bn/asm/x86_64-mont5.pl) - -if(OPENSSL_ASM) - list(APPEND BCM_SOURCES_ASM_USED ${BCM_SOURCES_ASM}) -endif() -if(OPENSSL_NASM) - list(APPEND BCM_SOURCES_ASM_USED ${BCM_SOURCES_NASM}) -endif() - -if(FIPS_DELOCATE) - if(FIPS_SHARED) - message(FATAL_ERROR "Can't set both delocate and shared mode for FIPS build") - endif() - - add_library( - bcm_c_generated_asm - - STATIC - - bcm.c - ) - add_dependencies(bcm_c_generated_asm boringssl_prefix_symbols) - target_include_directories(bcm_c_generated_asm PRIVATE ../../include) - set_target_properties(bcm_c_generated_asm PROPERTIES COMPILE_OPTIONS "-S") - set_target_properties(bcm_c_generated_asm PROPERTIES POSITION_INDEPENDENT_CODE ON) - - set(TARGET "") - if(CMAKE_ASM_COMPILER_TARGET) - set(TARGET "--target=${CMAKE_ASM_COMPILER_TARGET}") - endif() - - go_executable(delocate boringssl.googlesource.com/boringssl/util/fipstools/delocate) - add_custom_command( - OUTPUT bcm-delocated.S - COMMAND - ./delocate - -a $ - -o bcm-delocated.S - -cc ${CMAKE_ASM_COMPILER} - -cc-flags "${TARGET} ${CMAKE_ASM_FLAGS}" - ${PROJECT_SOURCE_DIR}/include/openssl/arm_arch.h - ${PROJECT_SOURCE_DIR}/include/openssl/asm_base.h - ${PROJECT_SOURCE_DIR}/include/openssl/target.h - ${BCM_SOURCES_ASM_USED} - DEPENDS - bcm_c_generated_asm - delocate - ${BCM_SOURCES_ASM_USED} - ${PROJECT_SOURCE_DIR}/include/openssl/arm_arch.h - ${PROJECT_SOURCE_DIR}/include/openssl/asm_base.h - ${PROJECT_SOURCE_DIR}/include/openssl/target.h - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) - - add_library( - bcm_hashunset - - STATIC - - bcm-delocated.S - ) - set_target_properties(bcm_hashunset PROPERTIES POSITION_INDEPENDENT_CODE ON) - set_target_properties(bcm_hashunset PROPERTIES LINKER_LANGUAGE C) - - go_executable(inject_hash - boringssl.googlesource.com/boringssl/util/fipstools/inject_hash) - add_custom_command( - OUTPUT bcm.o - COMMAND ./inject_hash -o bcm.o -in-archive $ - DEPENDS bcm_hashunset inject_hash - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) - - # The outputs of add_custom_command cannot be referenced outside of the - # CMakeLists.txt that defines it. Thus we have to wrap bcm.o in a custom target - # so that crypto can depend on it. - add_custom_target(bcm_o_target DEPENDS bcm.o) - - add_library( - fipsmodule - - OBJECT - - fips_shared_support.c - ) - add_dependencies(fipsmodule boringssl_prefix_symbols) - target_include_directories(fipsmodule PRIVATE ../../include) - set_target_properties(fipsmodule PROPERTIES LINKER_LANGUAGE C) -elseif(FIPS_SHARED) - if(NOT BUILD_SHARED_LIBS) - message(FATAL_ERROR "FIPS_SHARED set but not BUILD_SHARED_LIBS") - endif() - - add_library( - fipsmodule - - OBJECT - - fips_shared_support.c - ) - add_dependencies(fipsmodule boringssl_prefix_symbols) - target_include_directories(fipsmodule PRIVATE ../../include) - - add_library( - bcm_library - - STATIC - - bcm.c - ${BCM_SOURCES_ASM_USED} - ) - add_dependencies(bcm_library boringssl_prefix_symbols) - target_include_directories(bcm_library PRIVATE ../../include) - - add_custom_command( - OUTPUT bcm.o - COMMAND ${CMAKE_LINKER} -r -T ${CMAKE_CURRENT_SOURCE_DIR}/fips_shared.lds -o bcm.o --whole-archive $ - DEPENDS bcm_library fips_shared.lds - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) - - add_custom_target(bcm_o_target DEPENDS bcm.o) -else() - add_library( - fipsmodule - - OBJECT - - bcm.c - fips_shared_support.c - ${BCM_SOURCES_ASM_USED} - ) - add_dependencies(fipsmodule boringssl_prefix_symbols) - target_include_directories(fipsmodule PRIVATE ../../include) -endif() diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/bytes.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/bytes.c index d7f70dad6a..dcb0afc19a 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/bytes.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/bytes.c @@ -186,7 +186,7 @@ void bn_assert_fits_in_bytes(const BIGNUM *bn, size_t num) { void bn_words_to_big_endian(uint8_t *out, size_t out_len, const BN_ULONG *in, size_t in_len) { // The caller should have selected an output length without truncation. - assert(fits_in_bytes(in, in_len, out_len)); + declassify_assert(fits_in_bytes(in, in_len, out_len)); // We only support little-endian platforms, so the internal representation is // also little-endian as bytes. We can simply copy it in reverse. diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div.c index 6c13716920..f524f8939a 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div.c @@ -425,7 +425,7 @@ BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry, // // Although |carry| may be one if it was one on input and |bn_sub_words| // returns zero, this would give |r| > |m|, violating our input assumptions. - assert(carry == 0 || carry == (BN_ULONG)-1); + declassify_assert(carry + 1 <= 1); bn_select_words(r, carry, a /* r < 0 */, r /* r >= 0 */, num); return carry; } @@ -434,7 +434,7 @@ BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m, BN_ULONG *tmp, size_t num) { // See |bn_reduce_once| for why this logic works. carry -= bn_sub_words(tmp, r, m, num); - assert(carry == 0 || carry == (BN_ULONG)-1); + declassify_assert(carry + 1 <= 1); bn_select_words(r, carry, r /* tmp < 0 */, tmp /* tmp >= 0 */, num); return carry; } @@ -504,7 +504,7 @@ int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder, // |divisor_min_bits| bits, the top |divisor_min_bits - 1| can be incorporated // without reductions. This significantly speeds up |RSA_check_key|. For // simplicity, we round down to a whole number of words. - assert(divisor_min_bits <= BN_num_bits(divisor)); + declassify_assert(divisor_min_bits <= BN_num_bits(divisor)); int initial_words = 0; if (divisor_min_bits > 0) { initial_words = (divisor_min_bits - 1) / BN_BITS2; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div_extra.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div_extra.c index 141f7da67a..f75c9146c2 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div_extra.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/div_extra.c @@ -39,7 +39,7 @@ static uint16_t mod_u16(uint32_t n, uint16_t d, uint32_t p, uint32_t m) { // Multiply and subtract to get the remainder. n -= d * t; - assert(n < d); + declassify_assert(n < d); return n; } diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/exponentiation.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/exponentiation.c index 7b24d89509..53c6142bd7 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/exponentiation.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/exponentiation.c @@ -1013,7 +1013,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, // Prepare a^1 in the Montgomery domain. assert(!a->neg); - assert(BN_ucmp(a, m) < 0); + declassify_assert(BN_ucmp(a, m) < 0); if (!BN_to_montgomery(&am, a, mont, ctx) || !bn_resize_words(&am, top)) { goto err; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/gcd_extra.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/gcd_extra.c index b4fe00e3e4..531ff59f39 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/gcd_extra.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/gcd_extra.c @@ -93,7 +93,7 @@ static int bn_gcd_consttime(BIGNUM *r, unsigned *out_shift, const BIGNUM *x, // At least one of |u| and |v| is now even. BN_ULONG u_is_odd = word_is_odd_mask(u->d[0]); BN_ULONG v_is_odd = word_is_odd_mask(v->d[0]); - assert(!(u_is_odd & v_is_odd)); + declassify_assert(!(u_is_odd & v_is_odd)); // If both are even, the final GCD gains a factor of two. shift += 1 & (~u_is_odd & ~v_is_odd); @@ -106,7 +106,7 @@ static int bn_gcd_consttime(BIGNUM *r, unsigned *out_shift, const BIGNUM *x, // One of |u| or |v| is zero at this point. The algorithm usually makes |u| // zero, unless |y| was already zero on input. Fix this by combining the // values. - assert(BN_is_zero(u) || BN_is_zero(v)); + declassify_assert(BN_is_zero(u) | BN_is_zero(v)); for (size_t i = 0; i < width; i++) { v->d[i] |= u->d[i]; } @@ -289,7 +289,7 @@ int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse, const BIGNUM *a, // and |v| is now even. BN_ULONG u_is_even = ~word_is_odd_mask(u->d[0]); BN_ULONG v_is_even = ~word_is_odd_mask(v->d[0]); - assert(u_is_even != v_is_even); + declassify_assert(u_is_even != v_is_even); // Halve the even one and adjust the corresponding coefficient. maybe_rshift1_words(u->d, u_is_even, tmp->d, n_width); @@ -313,8 +313,11 @@ int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse, const BIGNUM *a, maybe_rshift1_words_carry(D->d, D_carry, v_is_even, tmp->d, a_width); } - assert(BN_is_zero(v)); - if (!BN_is_one(u)) { + declassify_assert(BN_is_zero(v)); + // While the inputs and output are secret, this function considers whether the + // input was invertible to be public. It is used as part of RSA key + // generation, where inputs are chosen to already be invertible. + if (constant_time_declassify_int(!BN_is_one(u))) { *out_no_inverse = 1; OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE); goto err; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/montgomery_inv.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/montgomery_inv.c index 068d00b480..499665a5fb 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/montgomery_inv.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/montgomery_inv.c @@ -153,7 +153,7 @@ static uint64_t bn_neg_inv_mod_r_u64(uint64_t n) { // The invariant now shows that u*r - v*n == 1 since r == 2 * alpha. #if BN_BITS2 == 64 && defined(BN_ULLONG) - assert(1 == ((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta)); + declassify_assert(1 == ((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta)); #endif return v; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/mul.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/mul.c index 7537899c0e..07612c5d1f 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/mul.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/mul.c @@ -292,7 +292,7 @@ static void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, } // The product should fit without carries. - assert(c == 0); + declassify_assert(c == 0); } // bn_mul_part_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r| @@ -406,7 +406,7 @@ static void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a, } // The product should fit without carries. - assert(c == 0); + declassify_assert(c == 0); } // bn_mul_impl implements |BN_mul| and |bn_mul_consttime|. Note this function diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/prime.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/prime.c index fb30768310..4722f80349 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/prime.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/prime.c @@ -487,7 +487,10 @@ err: static int bn_trial_division(uint16_t *out, const BIGNUM *bn) { const size_t num_primes = num_trial_division_primes(bn); for (size_t i = 1; i < num_primes; i++) { - if (bn_mod_u16_consttime(bn, kPrimes[i]) == 0) { + // During RSA key generation, |bn| may be secret, but only if |bn| was + // prime, so it is safe to leak failed trial divisions. + if (constant_time_declassify_int(bn_mod_u16_consttime(bn, kPrimes[i]) == + 0)) { *out = kPrimes[i]; return 1; } @@ -573,7 +576,8 @@ int bn_miller_rabin_iteration(const BN_MILLER_RABIN *miller_rabin, // To avoid leaking |a|, we run the loop to |w_bits| and mask off all // iterations once |j| = |a|. for (int j = 1; j < miller_rabin->w_bits; j++) { - if (constant_time_eq_int(j, miller_rabin->a) & ~is_possibly_prime) { + if (constant_time_declassify_w(constant_time_eq_int(j, miller_rabin->a) & + ~is_possibly_prime)) { // If the loop is done and we haven't seen z = 1 or z = w-1 yet, the // value is composite and we can break in variable time. break; @@ -593,12 +597,14 @@ int bn_miller_rabin_iteration(const BN_MILLER_RABIN *miller_rabin, // Step 4.5.3. If z = 1 and the loop is not done, the previous value of z // was not -1. There are no non-trivial square roots of 1 modulo a prime, so // w is composite and we may exit in variable time. - if (BN_equal_consttime(z, miller_rabin->one_mont) & ~is_possibly_prime) { + if (constant_time_declassify_w( + BN_equal_consttime(z, miller_rabin->one_mont) & + ~is_possibly_prime)) { break; } } - *out_is_possibly_prime = is_possibly_prime & 1; + *out_is_possibly_prime = constant_time_declassify_w(is_possibly_prime) & 1; ret = 1; err: @@ -736,8 +742,9 @@ int BN_primality_test(int *out_is_probably_prime, const BIGNUM *w, int checks, crypto_word_t uniform_iterations = 0; // Using |constant_time_lt_w| seems to prevent the compiler from optimizing // this into two jumps. - for (int i = 1; (i <= BN_PRIME_CHECKS_BLINDED) | - constant_time_lt_w(uniform_iterations, checks); + for (int i = 1; constant_time_declassify_w( + (i <= BN_PRIME_CHECKS_BLINDED) | + constant_time_lt_w(uniform_iterations, checks)); i++) { // Step 4.1-4.2 int is_uniform; @@ -766,7 +773,7 @@ int BN_primality_test(int *out_is_probably_prime, const BIGNUM *w, int checks, } } - assert(uniform_iterations >= (crypto_word_t)checks); + declassify_assert(uniform_iterations >= (crypto_word_t)checks); *out_is_probably_prime = 1; ret = 1; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/random.c b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/random.c index 06de274a6e..f57d1c6a38 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/bn/random.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/bn/random.c @@ -339,7 +339,8 @@ int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive, // If the value is not in range, force it to be in range. r->d[0] |= constant_time_select_w(in_range, 0, min_inclusive); r->d[words - 1] &= constant_time_select_w(in_range, BN_MASK2, mask >> 1); - assert(bn_in_range_words(r->d, min_inclusive, max_exclusive->d, words)); + declassify_assert( + bn_in_range_words(r->d, min_inclusive, max_exclusive->d, words)); r->neg = 0; r->width = (int)words; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/cipher/internal.h b/yass/third_party/boringssl/src/crypto/fipsmodule/cipher/internal.h index 6ec9a3b3c8..af902e0ef0 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/cipher/internal.h +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/cipher/internal.h @@ -134,9 +134,6 @@ struct evp_cipher_st { // flags contains the OR of a number of flags. See |EVP_CIPH_*|. uint32_t flags; - // app_data is a pointer to opaque, user data. - void *app_data; - int (*init)(EVP_CIPHER_CTX *ctx, const uint8_t *key, const uint8_t *iv, int enc); diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/ec/ec_key.c b/yass/third_party/boringssl/src/crypto/fipsmodule/ec/ec_key.c index 7b7d396abf..8cdb5c3e75 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/ec/ec_key.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/ec/ec_key.c @@ -242,7 +242,10 @@ int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *priv_key) { return 0; } if (!ec_bignum_to_scalar(key->group, &scalar->scalar, priv_key) || - ec_scalar_is_zero(key->group, &scalar->scalar)) { + // Zero is not a valid private key, so it is safe to leak the result of + // this comparison. + constant_time_declassify_int( + ec_scalar_is_zero(key->group, &scalar->scalar))) { OPENSSL_PUT_ERROR(EC, EC_R_INVALID_PRIVATE_KEY); ec_wrapped_scalar_free(scalar); return 0; @@ -518,6 +521,11 @@ int EC_KEY_generate_key(EC_KEY *key) { } int EC_KEY_generate_key_fips(EC_KEY *eckey) { + if (eckey == NULL || eckey->group == NULL) { + OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER); + return 0; + } + boringssl_ensure_ecc_self_test(); if (EC_KEY_generate_key(eckey) && EC_KEY_check_fips(eckey)) { diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/ec/scalar.c b/yass/third_party/boringssl/src/crypto/fipsmodule/ec/scalar.c index a86ee0fbbc..b05d50845d 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/ec/scalar.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/ec/scalar.c @@ -23,8 +23,12 @@ int ec_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out, const BIGNUM *in) { + // Scalars, which are often secret, must be reduced modulo the order. Those + // that are not will be discarded, so leaking the result of the comparison is + // safe. if (!bn_copy_words(out->words, group->order.N.width, in) || - !bn_less_than_words(out->words, group->order.N.d, group->order.N.width)) { + !constant_time_declassify_int(bn_less_than_words( + out->words, group->order.N.d, group->order.N.width))) { OPENSSL_PUT_ERROR(EC, EC_R_INVALID_SCALAR); return 0; } diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa.c b/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa.c index 1a9f0507af..b524e77da5 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa.c @@ -758,7 +758,8 @@ err: static int check_mod_inverse(int *out_ok, const BIGNUM *a, const BIGNUM *ainv, const BIGNUM *m, unsigned m_min_bits, BN_CTX *ctx) { - if (BN_is_negative(ainv) || BN_cmp(ainv, m) >= 0) { + if (BN_is_negative(ainv) || + constant_time_declassify_int(BN_cmp(ainv, m) >= 0)) { *out_ok = 0; return 1; } @@ -772,7 +773,7 @@ static int check_mod_inverse(int *out_ok, const BIGNUM *a, const BIGNUM *ainv, bn_mul_consttime(tmp, a, ainv, ctx) && bn_div_consttime(NULL, tmp, tmp, m, m_min_bits, ctx); if (ret) { - *out_ok = BN_is_one(tmp); + *out_ok = constant_time_declassify_int(BN_is_one(tmp)); } BN_CTX_end(ctx); return ret; @@ -831,8 +832,10 @@ int RSA_check_key(const RSA *key) { // bounds, to avoid a DoS vector in |bn_mul_consttime| below. Note that // n was bound by |rsa_check_public_key|. This also implicitly checks p and q // are odd, which is a necessary condition for Montgomery reduction. - if (BN_is_negative(key->p) || BN_cmp(key->p, key->n) >= 0 || - BN_is_negative(key->q) || BN_cmp(key->q, key->n) >= 0) { + if (BN_is_negative(key->p) || + constant_time_declassify_int(BN_cmp(key->p, key->n) >= 0) || + BN_is_negative(key->q) || + constant_time_declassify_int(BN_cmp(key->q, key->n) >= 0)) { OPENSSL_PUT_ERROR(RSA, RSA_R_N_NOT_EQUAL_P_Q); goto out; } @@ -863,7 +866,8 @@ int RSA_check_key(const RSA *key) { goto out; } - if (!BN_is_one(&tmp) || !BN_is_one(&de)) { + if (constant_time_declassify_int(!BN_is_one(&tmp)) || + constant_time_declassify_int(!BN_is_one(&de))) { OPENSSL_PUT_ERROR(RSA, RSA_R_D_E_NOT_CONGRUENT_TO_1); goto out; } diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c b/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c index f2dc7fe609..b8fbbdcfe7 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c @@ -795,7 +795,7 @@ static int mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx) { // This is a pre-condition for |mod_montgomery|. It was already checked by the // caller. - assert(BN_ucmp(I, n) < 0); + declassify_assert(BN_ucmp(I, n) < 0); if (// |m1| is the result modulo |q|. !mod_montgomery(r1, I, q, rsa->mont_q, p, ctx) || @@ -831,7 +831,7 @@ static int mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx) { // bound the width slightly higher, so fix it. This trips constant-time checks // because a naive data flow analysis does not realize the excess words are // publicly zero. - assert(BN_cmp(r0, n) < 0); + declassify_assert(BN_cmp(r0, n) < 0); bn_assert_fits_in_bytes(r0, BN_num_bytes(n)); if (!bn_resize_words(r0, n->width)) { goto err; @@ -1003,20 +1003,25 @@ static int generate_prime(BIGNUM *out, int bits, const BIGNUM *e, // retrying. That is, we reject a negligible fraction of primes that are // within the FIPS bound, but we will never accept a prime outside the // bound, ensuring the resulting RSA key is the right size. - if (BN_cmp(out, sqrt2) <= 0) { + // + // Values over the threshold are discarded, so it is safe to leak this + // comparison. + if (constant_time_declassify_int(BN_cmp(out, sqrt2) <= 0)) { continue; } // RSA key generation's bottleneck is discarding composites. If it fails // trial division, do not bother computing a GCD or performing Miller-Rabin. if (!bn_odd_number_is_obviously_composite(out)) { - // Check gcd(out-1, e) is one (steps 4.5 and 5.6). + // Check gcd(out-1, e) is one (steps 4.5 and 5.6). Leaking the final + // result of this comparison is safe because, if not relatively prime, the + // value will be discarded. int relatively_prime; - if (!BN_sub(tmp, out, BN_value_one()) || + if (!bn_usub_consttime(tmp, out, BN_value_one()) || !bn_is_relatively_prime(&relatively_prime, tmp, e, ctx)) { goto err; } - if (relatively_prime) { + if (constant_time_declassify_int(relatively_prime)) { // Test |out| for primality (steps 4.5.1 and 5.6.1). int is_probable_prime; if (!BN_primality_test(&is_probable_prime, out, @@ -1174,8 +1179,9 @@ static int rsa_generate_key_impl(RSA *rsa, int bits, const BIGNUM *e_value, } // Retry if |rsa->d| <= 2^|prime_bits|. See appendix B.3.1's guidance on - // values for d. - } while (BN_cmp(rsa->d, pow2_prime_bits) <= 0); + // values for d. When we retry, p and q are discarded, so it is safe to leak + // this comparison. + } while (constant_time_declassify_int(BN_cmp(rsa->d, pow2_prime_bits) <= 0)); assert(BN_num_bits(pm1) == (unsigned)prime_bits); assert(BN_num_bits(qm1) == (unsigned)prime_bits); @@ -1189,6 +1195,9 @@ static int rsa_generate_key_impl(RSA *rsa, int bits, const BIGNUM *e_value, } bn_set_minimal_width(rsa->n); + // |rsa->n| is computed from the private key, but is public. + bn_declassify(rsa->n); + // Sanity-check that |rsa->n| has the specified size. This is implied by // |generate_prime|'s bounds. if (BN_num_bits(rsa->n) != (unsigned)bits) { @@ -1241,6 +1250,11 @@ static int RSA_generate_key_ex_maybe_fips(RSA *rsa, int bits, int check_fips) { boringssl_ensure_rsa_self_test(); + if (rsa == NULL) { + OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER); + return 0; + } + RSA *tmp = NULL; uint32_t err; int ret = 0; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator.c b/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator.c index 64325bea12..cad355d215 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator.c @@ -171,7 +171,6 @@ static int is_md_fips_approved_for_signing(int md_type) { // type is FIPS approved for verifying, and zero otherwise. static int is_md_fips_approved_for_verifying(int md_type) { switch (md_type) { - case NID_sha1: case NID_sha224: case NID_sha256: case NID_sha384: @@ -184,7 +183,6 @@ static int is_md_fips_approved_for_verifying(int md_type) { } static void evp_md_ctx_verify_service_indicator(const EVP_MD_CTX *ctx, - int rsa_1024_ok, int (*md_ok)(int md_type)) { if (EVP_MD_CTX_md(ctx) == NULL) { // Signature schemes without a prehash are currently never FIPS approved. @@ -232,8 +230,7 @@ static void evp_md_ctx_verify_service_indicator(const EVP_MD_CTX *ctx, // Check if the MD type and the RSA key size are approved. if (md_ok(md_type) && - ((rsa_1024_ok && pkey_size == 128) || pkey_size == 256 || - pkey_size == 384 || pkey_size == 512)) { + (pkey_size == 256 || pkey_size == 384 || pkey_size == 512)) { FIPS_service_indicator_update_state(); } } else if (pkey_type == EVP_PKEY_EC) { @@ -280,12 +277,12 @@ void EVP_Cipher_verify_service_indicator(const EVP_CIPHER_CTX *ctx) { } void EVP_DigestVerify_verify_service_indicator(const EVP_MD_CTX *ctx) { - return evp_md_ctx_verify_service_indicator(ctx, /*rsa_1024_ok=*/1, + return evp_md_ctx_verify_service_indicator(ctx, is_md_fips_approved_for_verifying); } void EVP_DigestSign_verify_service_indicator(const EVP_MD_CTX *ctx) { - return evp_md_ctx_verify_service_indicator(ctx, /*rsa_1024_ok=*/0, + return evp_md_ctx_verify_service_indicator(ctx, is_md_fips_approved_for_signing); } diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator_test.cc b/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator_test.cc index 9eac62e4a4..e7221fa643 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator_test.cc +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/service_indicator/service_indicator_test.cc @@ -1127,43 +1127,43 @@ static const struct RSATestVector kRSATestVectors[] = { FIPSStatus::NOT_APPROVED}, {4096, &EVP_md5, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, - // RSA test cases that are approved. - {1024, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, - {1024, &EVP_sha256, false, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, - {1024, &EVP_sha512, false, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, - {1024, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, - {1024, &EVP_sha256, true, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + // RSA 1024 is not approved under FIPS 186-5. + {1024, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, + {1024, &EVP_sha256, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, + {1024, &EVP_sha512, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, + {1024, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, + {1024, &EVP_sha256, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, // PSS with hashLen == saltLen is not possible for 1024-bit modulus and // SHA-512. - {2048, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + {2048, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {2048, &EVP_sha224, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {2048, &EVP_sha256, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {2048, &EVP_sha384, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {2048, &EVP_sha512, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, - {2048, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + {2048, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {2048, &EVP_sha224, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {2048, &EVP_sha256, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {2048, &EVP_sha384, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {2048, &EVP_sha512, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, - {3072, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + {3072, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {3072, &EVP_sha224, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {3072, &EVP_sha256, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {3072, &EVP_sha384, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {3072, &EVP_sha512, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, - {3072, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + {3072, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {3072, &EVP_sha224, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {3072, &EVP_sha256, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {3072, &EVP_sha384, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {3072, &EVP_sha512, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, - {4096, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + {4096, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {4096, &EVP_sha224, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {4096, &EVP_sha256, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {4096, &EVP_sha384, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {4096, &EVP_sha512, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, - {4096, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + {4096, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {4096, &EVP_sha224, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {4096, &EVP_sha256, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {4096, &EVP_sha384, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, @@ -1359,7 +1359,7 @@ struct ECDSATestVector { int nid; // md_func is the digest to test. const EVP_MD *(*func)(); - // expected to be approved or not for signature generation. + // expected to be approved or not for key generation. FIPSStatus key_check_expect_approved; // expected to be approved or not for signature generation. FIPSStatus sig_gen_expect_approved; @@ -1372,7 +1372,7 @@ static const struct ECDSATestVector kECDSATestVectors[] = { // |EC_GROUP_new_by_curve_name|, and |NID_secp256k1| will only work if // |kCurveSecp256k1Supported| is true. {NID_secp224r1, &EVP_sha1, FIPSStatus::APPROVED, FIPSStatus::NOT_APPROVED, - FIPSStatus::APPROVED}, + FIPSStatus::NOT_APPROVED}, {NID_secp224r1, &EVP_sha224, FIPSStatus::APPROVED, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {NID_secp224r1, &EVP_sha256, FIPSStatus::APPROVED, FIPSStatus::APPROVED, @@ -1383,7 +1383,7 @@ static const struct ECDSATestVector kECDSATestVectors[] = { FIPSStatus::APPROVED}, {NID_X9_62_prime256v1, &EVP_sha1, FIPSStatus::APPROVED, - FIPSStatus::NOT_APPROVED, FIPSStatus::APPROVED}, + FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED}, {NID_X9_62_prime256v1, &EVP_sha224, FIPSStatus::APPROVED, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {NID_X9_62_prime256v1, &EVP_sha256, FIPSStatus::APPROVED, @@ -1394,7 +1394,7 @@ static const struct ECDSATestVector kECDSATestVectors[] = { FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {NID_secp384r1, &EVP_sha1, FIPSStatus::APPROVED, FIPSStatus::NOT_APPROVED, - FIPSStatus::APPROVED}, + FIPSStatus::NOT_APPROVED}, {NID_secp384r1, &EVP_sha224, FIPSStatus::APPROVED, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {NID_secp384r1, &EVP_sha256, FIPSStatus::APPROVED, FIPSStatus::APPROVED, @@ -1405,7 +1405,7 @@ static const struct ECDSATestVector kECDSATestVectors[] = { FIPSStatus::APPROVED}, {NID_secp521r1, &EVP_sha1, FIPSStatus::APPROVED, FIPSStatus::NOT_APPROVED, - FIPSStatus::APPROVED}, + FIPSStatus::NOT_APPROVED}, {NID_secp521r1, &EVP_sha224, FIPSStatus::APPROVED, FIPSStatus::APPROVED, FIPSStatus::APPROVED}, {NID_secp521r1, &EVP_sha256, FIPSStatus::APPROVED, FIPSStatus::APPROVED, @@ -1800,6 +1800,21 @@ TEST_P(KDF_ServiceIndicatorTest, TLSKDF) { EXPECT_EQ(approved, test.expect_approved); } +TEST_P(KDF_ServiceIndicatorTest, TLS13KDF) { + const KDFTestVector &test = GetParam(); + + FIPSStatus approved = FIPSStatus::NOT_APPROVED; + + uint8_t output[32]; + ASSERT_TRUE(CALL_SERVICE_AND_CHECK_APPROVED( + approved, CRYPTO_tls13_hkdf_expand_label( + output, sizeof(output), test.func(), kTLSSecret, + sizeof(kTLSSecret), /*label=*/kTLSSeed1, sizeof(kTLSSeed1), + /*hash=*/kTLSSeed2, sizeof(kTLSSeed2)))); + + EXPECT_EQ(approved, test.expect_approved); +} + TEST(ServiceIndicatorTest, CMAC) { FIPSStatus approved = FIPSStatus::NOT_APPROVED; diff --git a/yass/third_party/boringssl/src/crypto/fipsmodule/tls/kdf.c b/yass/third_party/boringssl/src/crypto/fipsmodule/tls/kdf.c index c4f4976c7f..7a7d12da11 100644 --- a/yass/third_party/boringssl/src/crypto/fipsmodule/tls/kdf.c +++ b/yass/third_party/boringssl/src/crypto/fipsmodule/tls/kdf.c @@ -189,6 +189,7 @@ int CRYPTO_tls13_hkdf_expand_label(uint8_t *out, size_t out_len, uint8_t *hkdf_label = NULL; size_t hkdf_label_len; + FIPS_service_indicator_lock_state(); CBB_zero(&cbb); if (!CBB_init(&cbb, 2 + 1 + sizeof(kProtocolLabel) - 1 + label_len + 1 + hash_len) || @@ -200,12 +201,18 @@ int CRYPTO_tls13_hkdf_expand_label(uint8_t *out, size_t out_len, !CBB_add_bytes(&child, hash, hash_len) || !CBB_finish(&cbb, &hkdf_label, &hkdf_label_len)) { CBB_cleanup(&cbb); + FIPS_service_indicator_unlock_state(); return 0; } const int ret = HKDF_expand(out, out_len, digest, secret, secret_len, hkdf_label, hkdf_label_len); OPENSSL_free(hkdf_label); + + FIPS_service_indicator_unlock_state(); + if (ret) { + TLSKDF_verify_service_indicator(digest); + } return ret; } diff --git a/yass/third_party/boringssl/src/crypto/internal.h b/yass/third_party/boringssl/src/crypto/internal.h index f8762180c1..762eaae8c9 100644 --- a/yass/third_party/boringssl/src/crypto/internal.h +++ b/yass/third_party/boringssl/src/crypto/internal.h @@ -609,6 +609,12 @@ static inline int constant_time_declassify_int(int v) { return value_barrier_u32(v); } +// declassify_assert behaves like |assert| but declassifies the result of +// evaluating |expr|. This allows the assertion to branch on the (presumably +// public) result, but still ensures that values leading up to the computation +// were secret. +#define declassify_assert(expr) assert(constant_time_declassify_int(expr)) + // Thread-safe initialisation. @@ -1205,13 +1211,13 @@ static inline uint64_t CRYPTO_rotr_u64(uint64_t value, int shift) { static inline uint32_t CRYPTO_addc_u32(uint32_t x, uint32_t y, uint32_t carry, uint32_t *out_carry) { - assert(carry <= 1); + declassify_assert(carry <= 1); return CRYPTO_GENERIC_ADDC(x, y, carry, out_carry); } static inline uint64_t CRYPTO_addc_u64(uint64_t x, uint64_t y, uint64_t carry, uint64_t *out_carry) { - assert(carry <= 1); + declassify_assert(carry <= 1); return CRYPTO_GENERIC_ADDC(x, y, carry, out_carry); } @@ -1219,7 +1225,7 @@ static inline uint64_t CRYPTO_addc_u64(uint64_t x, uint64_t y, uint64_t carry, static inline uint32_t CRYPTO_addc_u32(uint32_t x, uint32_t y, uint32_t carry, uint32_t *out_carry) { - assert(carry <= 1); + declassify_assert(carry <= 1); uint64_t ret = carry; ret += (uint64_t)x + y; *out_carry = (uint32_t)(ret >> 32); @@ -1228,7 +1234,7 @@ static inline uint32_t CRYPTO_addc_u32(uint32_t x, uint32_t y, uint32_t carry, static inline uint64_t CRYPTO_addc_u64(uint64_t x, uint64_t y, uint64_t carry, uint64_t *out_carry) { - assert(carry <= 1); + declassify_assert(carry <= 1); #if defined(BORINGSSL_HAS_UINT128) uint128_t ret = carry; ret += (uint128_t)x + y; @@ -1257,13 +1263,13 @@ static inline uint64_t CRYPTO_addc_u64(uint64_t x, uint64_t y, uint64_t carry, static inline uint32_t CRYPTO_subc_u32(uint32_t x, uint32_t y, uint32_t borrow, uint32_t *out_borrow) { - assert(borrow <= 1); + declassify_assert(borrow <= 1); return CRYPTO_GENERIC_SUBC(x, y, borrow, out_borrow); } static inline uint64_t CRYPTO_subc_u64(uint64_t x, uint64_t y, uint64_t borrow, uint64_t *out_borrow) { - assert(borrow <= 1); + declassify_assert(borrow <= 1); return CRYPTO_GENERIC_SUBC(x, y, borrow, out_borrow); } @@ -1271,7 +1277,7 @@ static inline uint64_t CRYPTO_subc_u64(uint64_t x, uint64_t y, uint64_t borrow, static inline uint32_t CRYPTO_subc_u32(uint32_t x, uint32_t y, uint32_t borrow, uint32_t *out_borrow) { - assert(borrow <= 1); + declassify_assert(borrow <= 1); uint32_t ret = x - y - borrow; *out_borrow = (x < y) | ((x == y) & borrow); return ret; @@ -1279,7 +1285,7 @@ static inline uint32_t CRYPTO_subc_u32(uint32_t x, uint32_t y, uint32_t borrow, static inline uint64_t CRYPTO_subc_u64(uint64_t x, uint64_t y, uint64_t borrow, uint64_t *out_borrow) { - assert(borrow <= 1); + declassify_assert(borrow <= 1); uint64_t ret = x - y - borrow; *out_borrow = (x < y) | ((x == y) & borrow); return ret; diff --git a/yass/third_party/boringssl/src/crypto/pkcs8/pkcs12_test.cc b/yass/third_party/boringssl/src/crypto/pkcs8/pkcs12_test.cc index 459339d758..a210089158 100644 --- a/yass/third_party/boringssl/src/crypto/pkcs8/pkcs12_test.cc +++ b/yass/third_party/boringssl/src/crypto/pkcs8/pkcs12_test.cc @@ -25,11 +25,10 @@ #include #include +#include "../test/test_data.h" #include "../test/test_util.h" -std::string GetTestData(const char *path); - // kPassword is the password shared by most of the sample PKCS#12 files. static const char kPassword[] = "foo"; diff --git a/yass/third_party/boringssl/src/crypto/spx/spx.c b/yass/third_party/boringssl/src/crypto/spx/spx.c index b5bec6e877..372001a94c 100644 --- a/yass/third_party/boringssl/src/crypto/spx/spx.c +++ b/yass/third_party/boringssl/src/crypto/spx/spx.c @@ -16,6 +16,7 @@ #include +#define OPENSSL_UNSTABLE_EXPERIMENTAL_SPX #include #include @@ -26,14 +27,14 @@ #include "./spx_util.h" #include "./thash.h" -void spx_generate_key(uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], +void SPX_generate_key(uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], uint8_t out_secret_key[SPX_SECRET_KEY_BYTES]) { uint8_t seed[3 * SPX_N]; RAND_bytes(seed, 3 * SPX_N); - spx_generate_key_from_seed(out_public_key, out_secret_key, seed); + SPX_generate_key_from_seed(out_public_key, out_secret_key, seed); } -void spx_generate_key_from_seed(uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], +void SPX_generate_key_from_seed(uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], uint8_t out_secret_key[SPX_SECRET_KEY_BYTES], const uint8_t seed[3 * SPX_N]) { // Initialize SK.seed || SK.prf || PK.seed from seed. @@ -51,7 +52,7 @@ void spx_generate_key_from_seed(uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], memcpy(out_secret_key + 3 * SPX_N, out_public_key + SPX_N, SPX_N); } -void spx_sign(uint8_t out_signature[SPX_SIGNATURE_BYTES], +void SPX_sign(uint8_t out_signature[SPX_SIGNATURE_BYTES], const uint8_t secret_key[SPX_SECRET_KEY_BYTES], const uint8_t *msg, size_t msg_len, int randomized) { uint8_t addr[32] = {0}; @@ -102,7 +103,7 @@ void spx_sign(uint8_t out_signature[SPX_SIGNATURE_BYTES], idx_leaf, sk_seed, pk_seed); } -int spx_verify(const uint8_t signature[SPX_SIGNATURE_BYTES], +int SPX_verify(const uint8_t signature[SPX_SIGNATURE_BYTES], const uint8_t public_key[SPX_SECRET_KEY_BYTES], const uint8_t *msg, size_t msg_len) { uint8_t addr[32] = {0}; diff --git a/yass/third_party/boringssl/src/crypto/spx/spx_test.cc b/yass/third_party/boringssl/src/crypto/spx/spx_test.cc index 313f47d425..e17dce8554 100644 --- a/yass/third_party/boringssl/src/crypto/spx/spx_test.cc +++ b/yass/third_party/boringssl/src/crypto/spx/spx_test.cc @@ -21,12 +21,13 @@ #include #include +#define OPENSSL_UNSTABLE_EXPERIMENTAL_SPX #include #include "../test/file_test.h" #include "../test/test_util.h" - +// suppress warnings for experimental spx api namespace { TEST(SpxTest, KeyGeneration) { @@ -47,7 +48,7 @@ TEST(SpxTest, KeyGeneration) { uint8_t pk[2 * SPX_N]; uint8_t sk[4 * SPX_N]; - spx_generate_key_from_seed(pk, sk, seed); + SPX_generate_key_from_seed(pk, sk, seed); EXPECT_EQ(Bytes(pk), Bytes(expected_pk)); EXPECT_EQ(Bytes(sk), Bytes(expected_sk)); } @@ -75,7 +76,7 @@ TEST(SpxTest, KeyGeneration2) { uint8_t pk[2 * SPX_N]; uint8_t sk[4 * SPX_N]; - spx_generate_key_from_seed(pk, sk, seed); + SPX_generate_key_from_seed(pk, sk, seed); EXPECT_EQ(Bytes(pk), Bytes(expected_pk)); EXPECT_EQ(Bytes(sk), Bytes(expected_sk)); } @@ -83,12 +84,12 @@ TEST(SpxTest, KeyGeneration2) { TEST(SpxTest, BasicSignVerify) { uint8_t pk[2 * SPX_N]; uint8_t sk[4 * SPX_N]; - spx_generate_key(pk, sk); + SPX_generate_key(pk, sk); uint8_t message[] = {0x42}; uint8_t signature[SPX_SIGNATURE_BYTES]; - spx_sign(signature, sk, message, sizeof(message), true); - EXPECT_EQ(spx_verify(signature, pk, message, sizeof(message)), 1); + SPX_sign(signature, sk, message, sizeof(message), true); + EXPECT_EQ(SPX_verify(signature, pk, message, sizeof(message)), 1); } static void SpxFileTest(FileTest *t) { @@ -102,13 +103,13 @@ static void SpxFileTest(FileTest *t) { t->IgnoreAttribute("smlen"); ASSERT_TRUE(t->GetBytes(&signature, "sm")); - EXPECT_EQ(spx_verify(signature.data(), public_key.data(), message.data(), + EXPECT_EQ(SPX_verify(signature.data(), public_key.data(), message.data(), message.size()), 1); message[0] ^= 1; - EXPECT_EQ(spx_verify(signature.data(), public_key.data(), message.data(), + EXPECT_EQ(SPX_verify(signature.data(), public_key.data(), message.data(), message.size()), 0); } @@ -126,7 +127,7 @@ static void SpxFileDeterministicTest(FileTest *t) { expected_signature.resize(SPX_SIGNATURE_BYTES); uint8_t signature[SPX_SIGNATURE_BYTES]; - spx_sign(signature, sk.data(), message.data(), message.size(), false); + SPX_sign(signature, sk.data(), message.data(), message.size(), false); EXPECT_EQ(Bytes(signature), Bytes(expected_signature)); } diff --git a/yass/third_party/boringssl/src/crypto/stack/stack_test.cc b/yass/third_party/boringssl/src/crypto/stack/stack_test.cc index 228182b623..d2342a55be 100644 --- a/yass/third_party/boringssl/src/crypto/stack/stack_test.cc +++ b/yass/third_party/boringssl/src/crypto/stack/stack_test.cc @@ -517,3 +517,15 @@ TEST(StackTest, Sort) { } } } + +TEST(StackTest, NullIsEmpty) { + EXPECT_EQ(0u, sk_TEST_INT_num(nullptr)); + + EXPECT_EQ(nullptr, sk_TEST_INT_value(nullptr, 0)); + EXPECT_EQ(nullptr, sk_TEST_INT_value(nullptr, 1)); + + bssl::UniquePtr value = TEST_INT_new(6); + ASSERT_TRUE(value); + size_t index; + EXPECT_FALSE(sk_TEST_INT_find(nullptr, &index, value.get())); +} diff --git a/yass/third_party/boringssl/src/crypto/test/CMakeLists.txt b/yass/third_party/boringssl/src/crypto/test/CMakeLists.txt deleted file mode 100644 index b52e9de2ad..0000000000 --- a/yass/third_party/boringssl/src/crypto/test/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -if (BORINGSSL_BUILD_TESTS) - -add_library( - test_support_lib - - STATIC - - abi_test.cc - file_test.cc - test_util.cc - wycheproof_util.cc -) - -if (LIBUNWIND_FOUND) - target_compile_options(test_support_lib PRIVATE ${LIBUNWIND_CFLAGS_OTHER}) - target_include_directories(test_support_lib PRIVATE ${LIBUNWIND_INCLUDE_DIRS}) - target_link_libraries(test_support_lib ${LIBUNWIND_LDFLAGS}) -endif() -if(WIN32) - target_link_libraries(test_support_lib dbghelp) -endif() -target_link_libraries(test_support_lib boringssl_gtest crypto) - -add_library(boringssl_gtest_main STATIC gtest_main.cc) -target_link_libraries(boringssl_gtest_main boringssl_gtest crypto test_support_lib) -endif() diff --git a/yass/third_party/boringssl/src/crypto/test/file_test_gtest.cc b/yass/third_party/boringssl/src/crypto/test/file_test_gtest.cc index 2cb0896947..b1d35562a4 100644 --- a/yass/third_party/boringssl/src/crypto/test/file_test_gtest.cc +++ b/yass/third_party/boringssl/src/crypto/test/file_test_gtest.cc @@ -25,8 +25,8 @@ #include +#include "test_data.h" -std::string GetTestData(const char *path); class StringLineReader : public FileTest::LineReader { public: diff --git a/yass/third_party/boringssl/src/crypto/test/test_data.cc b/yass/third_party/boringssl/src/crypto/test/test_data.cc new file mode 100644 index 0000000000..9fe2aaaa06 --- /dev/null +++ b/yass/third_party/boringssl/src/crypto/test/test_data.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2024, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#include "test_data.h" + +#include +#include + +#include "file_util.h" + +#if !defined(BORINGSSL_CUSTOM_GET_TEST_DATA) +std::string GetTestData(const char *path) { + const char *root = getenv("BORINGSSL_TEST_DATA_ROOT"); + root = root != nullptr ? root : "."; + + std::string full_path = root; + full_path.push_back('/'); + full_path.append(path); + + ScopedFILE file(fopen(full_path.c_str(), "rb")); + if (file == nullptr) { + fprintf(stderr, "Could not open '%s'.\n", full_path.c_str()); + abort(); + } + + std::string ret; + for (;;) { + char buf[512]; + size_t n = fread(buf, 1, sizeof(buf), file.get()); + if (n == 0) { + if (feof(file.get())) { + return ret; + } + fprintf(stderr, "Error reading from '%s'.\n", full_path.c_str()); + abort(); + } + ret.append(buf, n); + } +} +#endif // !BORINGSSL_CUSTOM_GET_TEST_DATA diff --git a/yass/third_party/boringssl/src/crypto/test/test_data.h b/yass/third_party/boringssl/src/crypto/test/test_data.h new file mode 100644 index 0000000000..b3f4b6ace3 --- /dev/null +++ b/yass/third_party/boringssl/src/crypto/test/test_data.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2024, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#ifndef OPENSSL_HEADER_CRYPTO_TEST_TEST_DATA_H +#define OPENSSL_HEADER_CRYPTO_TEST_TEST_DATA_H + +#include + +// GetTestData returns the test data for |path|, or aborts on error. |path| +// must be a slash-separated path, relative to the BoringSSL source tree. By +// default, this is implemented by reading from the filesystem, relative to +// the BORINGSSL_TEST_DATA_ROOT environment variable, or the current working +// directory if unset. +// +// Callers with more complex needs can build with +// BORINGSSL_CUSTOM_GET_TEST_DATA and then link in an alternate implementation +// of this function. +std::string GetTestData(const char *path); + +#endif // OPENSSL_HEADER_CRYPTO_TEST_TEST_DATA_H diff --git a/yass/third_party/boringssl/src/crypto/x509/by_dir.c b/yass/third_party/boringssl/src/crypto/x509/by_dir.c index d49220d889..5829de46fb 100644 --- a/yass/third_party/boringssl/src/crypto/x509/by_dir.c +++ b/yass/third_party/boringssl/src/crypto/x509/by_dir.c @@ -313,14 +313,20 @@ static int get_cert_by_subject(X509_LOOKUP *xl, int type, X509_NAME *name, k); if (type == X509_LU_X509) { if ((X509_load_cert_file(xl, b->data, ent->dir_type)) == 0) { + // Don't expose the lower level error, All of these boil + // down to "we could not find a CA". + ERR_clear_error(); break; } } else if (type == X509_LU_CRL) { if ((X509_load_crl_file(xl, b->data, ent->dir_type)) == 0) { + // Don't expose the lower level error, All of these boil + // down to "we could not find a CRL". + ERR_clear_error(); break; } } - // else case will caught higher up + // The lack of a CA or CRL will be caught higher up k++; } diff --git a/yass/third_party/boringssl/src/crypto/x509/internal.h b/yass/third_party/boringssl/src/crypto/x509/internal.h index 1cdefafe5d..50180636c1 100644 --- a/yass/third_party/boringssl/src/crypto/x509/internal.h +++ b/yass/third_party/boringssl/src/crypto/x509/internal.h @@ -422,7 +422,7 @@ int x509_print_rsa_pss_params(BIO *bp, const X509_ALGOR *sigalg, int indent, // Signature algorithm functions. // x509_digest_sign_algorithm encodes the signing parameters of |ctx| as an -// AlgorithmIdentifer and saves the result in |algor|. It returns one on +// AlgorithmIdentifier and saves the result in |algor|. It returns one on // success, or zero on error. int x509_digest_sign_algorithm(EVP_MD_CTX *ctx, X509_ALGOR *algor); diff --git a/yass/third_party/boringssl/src/crypto/x509/rsa_pss.c b/yass/third_party/boringssl/src/crypto/x509/rsa_pss.c index 5974bfab51..8e19b8c1ae 100644 --- a/yass/third_party/boringssl/src/crypto/x509/rsa_pss.c +++ b/yass/third_party/boringssl/src/crypto/x509/rsa_pss.c @@ -125,7 +125,11 @@ static int rsa_md_to_algor(X509_ALGOR **palg, const EVP_MD *md) { if (*palg == NULL) { return 0; } - X509_ALGOR_set_md(*palg, md); + if (!X509_ALGOR_set_md(*palg, md)) { + X509_ALGOR_free(*palg); + *palg = NULL; + return 0; + } return 1; } diff --git a/yass/third_party/boringssl/src/crypto/x509/x509_test.cc b/yass/third_party/boringssl/src/crypto/x509/x509_test.cc index c4fd0b8aae..ffbcf7b861 100644 --- a/yass/third_party/boringssl/src/crypto/x509/x509_test.cc +++ b/yass/third_party/boringssl/src/crypto/x509/x509_test.cc @@ -37,6 +37,7 @@ #include "internal.h" #include "../internal.h" #include "../test/file_util.h" +#include "../test/test_data.h" #include "../test/test_util.h" #if defined(OPENSSL_THREADS) @@ -44,8 +45,6 @@ #endif -std::string GetTestData(const char *path); - static const char kCrossSigningRootPEM[] = R"( -----BEGIN CERTIFICATE----- MIICcTCCAdqgAwIBAgIIagJHiPvE0MowDQYJKoZIhvcNAQELBQAwPDEaMBgGA1UE @@ -1789,7 +1788,8 @@ static bool AddSubjectKeyIdentifier(X509 *x509, /*crit=*/0, /*flags=*/0); } -static bool AddAuthorityKeyIdentifer(X509 *x509, bssl::Span key_id) { +static bool AddAuthorityKeyIdentifier(X509 *x509, + bssl::Span key_id) { bssl::UniquePtr akid(AUTHORITY_KEYID_new()); if (akid == nullptr) { return false; @@ -1843,7 +1843,7 @@ static bool AddRevokedSerialU64(X509_CRL *crl, uint64_t serial, return true; } -static bool AddAuthorityKeyIdentifer(X509_CRL *crl, +static bool AddAuthorityKeyIdentifier(X509_CRL *crl, bssl::Span key_id) { bssl::UniquePtr akid(AUTHORITY_KEYID_new()); if (akid == nullptr) { @@ -2813,6 +2813,25 @@ TEST(X509Test, PrettyPrintIntegers) { } } +TEST(X509Test, X509AlgorSetMd) { + bssl::UniquePtr alg(X509_ALGOR_new()); + ASSERT_TRUE(alg); + EXPECT_TRUE(X509_ALGOR_set_md(alg.get(), EVP_sha256())); + const ASN1_OBJECT *obj; + const void *pval; + int ptype = 0; + X509_ALGOR_get0(&obj, &ptype, &pval, alg.get()); + EXPECT_TRUE(obj); + EXPECT_EQ(OBJ_obj2nid(obj), NID_sha256); + EXPECT_EQ(ptype, V_ASN1_NULL); // OpenSSL has V_ASN1_UNDEF + EXPECT_EQ(pval, nullptr); + EXPECT_TRUE(X509_ALGOR_set_md(alg.get(), EVP_md5())); + X509_ALGOR_get0(&obj, &ptype, &pval, alg.get()); + EXPECT_EQ(OBJ_obj2nid(obj), NID_md5); + EXPECT_EQ(ptype, V_ASN1_NULL); + EXPECT_EQ(pval, nullptr); +} + TEST(X509Test, X509NameSet) { bssl::UniquePtr name(X509_NAME_new()); ASSERT_TRUE(name); @@ -8149,9 +8168,16 @@ TEST(X509Test, DirHash) { EXPECT_EQ(X509_V_OK, test_issuer(old_collide_name2)); // Test a certificate not in the store. + ERR_clear_error(); EXPECT_EQ(X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY, test_issuer("Not In Store")); + // Although, internally, this hits the filesystem and finds that a file does + // not exist, there should not be anything on the error queue about a + // missing file. |X509_verify_cert| generally does not use the error queue, + // so it will be empty. See https://crbug.com/boringssl/708. + EXPECT_EQ(ERR_get_error(), 0u); + // Test CRL handling. First, if we cannot find a CRL, verification will // fail. // @@ -8429,11 +8455,11 @@ TEST(X509Test, DuplicateName) { bssl::UniquePtr leaf1 = MakeTestCert("CA", "Leaf", key1.get(), /*is_ca=*/false); ASSERT_TRUE(leaf1); - ASSERT_TRUE(AddAuthorityKeyIdentifer(leaf1.get(), key_id1)); + ASSERT_TRUE(AddAuthorityKeyIdentifier(leaf1.get(), key_id1)); ASSERT_TRUE(X509_sign(leaf1.get(), key1.get(), EVP_sha256())); bssl::UniquePtr crl1 = MakeTestCRL("CA", -1, 1); ASSERT_TRUE(crl1); - ASSERT_TRUE(AddAuthorityKeyIdentifer(crl1.get(), key_id1)); + ASSERT_TRUE(AddAuthorityKeyIdentifier(crl1.get(), key_id1)); ASSERT_TRUE(X509_CRL_sign(crl1.get(), key1.get(), EVP_sha256())); // TODO(davidben): Some state in CRLs does not get correctly set up unless it // is parsed from data. |X509_CRL_sign| should reset it internally. @@ -8451,11 +8477,11 @@ TEST(X509Test, DuplicateName) { bssl::UniquePtr leaf2 = MakeTestCert("CA", "Leaf", key2.get(), /*is_ca=*/false); ASSERT_TRUE(leaf2); - ASSERT_TRUE(AddAuthorityKeyIdentifer(leaf2.get(), key_id2)); + ASSERT_TRUE(AddAuthorityKeyIdentifier(leaf2.get(), key_id2)); ASSERT_TRUE(X509_sign(leaf2.get(), key2.get(), EVP_sha256())); bssl::UniquePtr crl2 = MakeTestCRL("CA", -2, 2); ASSERT_TRUE(crl2); - ASSERT_TRUE(AddAuthorityKeyIdentifer(crl2.get(), key_id2)); + ASSERT_TRUE(AddAuthorityKeyIdentifier(crl2.get(), key_id2)); ASSERT_TRUE(X509_CRL_sign(crl2.get(), key2.get(), EVP_sha256())); // TODO(davidben): Some state in CRLs does not get correctly set up unless it // is parsed from data. |X509_CRL_sign| should reset it internally. diff --git a/yass/third_party/boringssl/src/crypto/x509/x509_trs.c b/yass/third_party/boringssl/src/crypto/x509/x509_trs.c index 87137b15e2..64380c0aa2 100644 --- a/yass/third_party/boringssl/src/crypto/x509/x509_trs.c +++ b/yass/third_party/boringssl/src/crypto/x509/x509_trs.c @@ -67,14 +67,14 @@ typedef struct x509_trust_st X509_TRUST; struct x509_trust_st { int trust; - int (*check_trust)(const X509_TRUST *, X509 *, int); + int (*check_trust)(const X509_TRUST *, X509 *); int nid; } /* X509_TRUST */; -static int trust_1oidany(const X509_TRUST *trust, X509 *x, int flags); -static int trust_compat(const X509_TRUST *trust, X509 *x, int flags); +static int trust_1oidany(const X509_TRUST *trust, X509 *x); +static int trust_compat(const X509_TRUST *trust, X509 *x); -static int obj_trust(int id, X509 *x, int flags); +static int obj_trust(int id, X509 *x); static const X509_TRUST trstandard[] = { {X509_TRUST_COMPAT, trust_compat, 0}, @@ -99,36 +99,36 @@ int X509_check_trust(X509 *x, int id, int flags) { } // We get this as a default value if (id == 0) { - int rv = obj_trust(NID_anyExtendedKeyUsage, x, 0); + int rv = obj_trust(NID_anyExtendedKeyUsage, x); if (rv != X509_TRUST_UNTRUSTED) { return rv; } - return trust_compat(NULL, x, 0); + return trust_compat(NULL, x); } const X509_TRUST *pt = X509_TRUST_get0(id); if (pt == NULL) { // Unknown trust IDs are silently reintrepreted as NIDs. This is unreachable // from the certificate verifier itself, but wpa_supplicant relies on it. // Note this relies on commonly-used NIDs and trust IDs not colliding. - return obj_trust(id, x, flags); + return obj_trust(id, x); } - return pt->check_trust(pt, x, flags); + return pt->check_trust(pt, x); } int X509_is_valid_trust_id(int trust) { return X509_TRUST_get0(trust) != NULL; } -static int trust_1oidany(const X509_TRUST *trust, X509 *x, int flags) { +static int trust_1oidany(const X509_TRUST *trust, X509 *x) { if (x->aux && (x->aux->trust || x->aux->reject)) { - return obj_trust(trust->nid, x, flags); + return obj_trust(trust->nid, x); } // we don't have any trust settings: for compatibility we return trusted // if it is self signed - return trust_compat(trust, x, flags); + return trust_compat(trust, x); } -static int trust_compat(const X509_TRUST *trust, X509 *x, int flags) { +static int trust_compat(const X509_TRUST *trust, X509 *x) { if (!x509v3_cache_extensions(x)) { return X509_TRUST_UNTRUSTED; } @@ -139,7 +139,7 @@ static int trust_compat(const X509_TRUST *trust, X509 *x, int flags) { } } -static int obj_trust(int id, X509 *x, int flags) { +static int obj_trust(int id, X509 *x) { X509_CERT_AUX *ax = x->aux; if (!ax) { return X509_TRUST_UNTRUSTED; diff --git a/yass/third_party/boringssl/src/crypto/x509/x_algor.c b/yass/third_party/boringssl/src/crypto/x509/x_algor.c index 819aee5f35..bdc77ae969 100644 --- a/yass/third_party/boringssl/src/crypto/x509/x_algor.c +++ b/yass/third_party/boringssl/src/crypto/x509/x_algor.c @@ -123,7 +123,7 @@ void X509_ALGOR_get0(const ASN1_OBJECT **out_obj, int *out_param_type, // Set up an X509_ALGOR DigestAlgorithmIdentifier from an EVP_MD -void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md) { +int X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md) { int param_type; if (EVP_MD_flags(md) & EVP_MD_FLAG_DIGALGID_ABSENT) { @@ -132,7 +132,7 @@ void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md) { param_type = V_ASN1_NULL; } - X509_ALGOR_set0(alg, OBJ_nid2obj(EVP_MD_type(md)), param_type, NULL); + return X509_ALGOR_set0(alg, OBJ_nid2obj(EVP_MD_type(md)), param_type, NULL); } // X509_ALGOR_cmp returns 0 if |a| and |b| are equal and non-zero otherwise. diff --git a/yass/third_party/boringssl/src/decrepit/CMakeLists.txt b/yass/third_party/boringssl/src/decrepit/CMakeLists.txt deleted file mode 100644 index 9c042bbea1..0000000000 --- a/yass/third_party/boringssl/src/decrepit/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -add_library( - decrepit - - bio/base64_bio.c - blowfish/blowfish.c - cast/cast.c - cast/cast_tables.c - cfb/cfb.c - des/cfb64ede.c - dh/dh_decrepit.c - dsa/dsa_decrepit.c - evp/dss1.c - evp/evp_do_all.c - obj/obj_decrepit.c - rc4/rc4_decrepit.c - ripemd/ripemd.c - rsa/rsa_decrepit.c - ssl/ssl_decrepit.c - x509/x509_decrepit.c - xts/xts.c -) -target_link_libraries(decrepit crypto ssl) diff --git a/yass/third_party/boringssl/src/decrepit/blowfish/blowfish.c b/yass/third_party/boringssl/src/decrepit/blowfish/blowfish.c index 7c209da5d3..82acb5f892 100644 --- a/yass/third_party/boringssl/src/decrepit/blowfish/blowfish.c +++ b/yass/third_party/boringssl/src/decrepit/blowfish/blowfish.c @@ -594,30 +594,36 @@ static int bf_cfb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, } static const EVP_CIPHER bf_ecb = { - NID_bf_ecb, BF_BLOCK /* block_size */, - 16 /* key_size */, BF_BLOCK /* iv_len */, - sizeof(BF_KEY), EVP_CIPH_ECB_MODE | EVP_CIPH_VARIABLE_LENGTH, - NULL /* app_data */, bf_init_key, - bf_ecb_cipher, NULL /* cleanup */, - NULL /* ctrl */, + .nid = NID_bf_ecb, + .block_size = BF_BLOCK, + .key_len = 16, + .iv_len = BF_BLOCK, + .ctx_size = sizeof(BF_KEY), + .flags = EVP_CIPH_ECB_MODE | EVP_CIPH_VARIABLE_LENGTH, + .init = bf_init_key, + .cipher = bf_ecb_cipher, }; static const EVP_CIPHER bf_cbc = { - NID_bf_cbc, BF_BLOCK /* block_size */, - 16 /* key_size */, BF_BLOCK /* iv_len */, - sizeof(BF_KEY), EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH, - NULL /* app_data */, bf_init_key, - bf_cbc_cipher, NULL /* cleanup */, - NULL /* ctrl */, + .nid = NID_bf_cbc, + .block_size = BF_BLOCK, + .key_len = 16, + .iv_len = BF_BLOCK, + .ctx_size = sizeof(BF_KEY), + .flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH, + .init = bf_init_key, + .cipher = bf_cbc_cipher, }; static const EVP_CIPHER bf_cfb = { - NID_bf_cfb64, 1 /* block_size */, - 16 /* key_size */, BF_BLOCK /* iv_len */, - sizeof(BF_KEY), EVP_CIPH_CFB_MODE | EVP_CIPH_VARIABLE_LENGTH, - NULL /* app_data */, bf_init_key, - bf_cfb_cipher, NULL /* cleanup */, - NULL /* ctrl */, + .nid = NID_bf_cfb64, + .block_size = 1, + .key_len = 16, + .iv_len = BF_BLOCK, + .ctx_size = sizeof(BF_KEY), + .flags = EVP_CIPH_CFB_MODE | EVP_CIPH_VARIABLE_LENGTH, + .init = bf_init_key, + .cipher = bf_cfb_cipher, }; const EVP_CIPHER *EVP_bf_ecb(void) { return &bf_ecb; } diff --git a/yass/third_party/boringssl/src/decrepit/cast/cast.c b/yass/third_party/boringssl/src/decrepit/cast/cast.c index 314e3da21e..f6f1963abd 100644 --- a/yass/third_party/boringssl/src/decrepit/cast/cast.c +++ b/yass/third_party/boringssl/src/decrepit/cast/cast.c @@ -437,21 +437,25 @@ static int cast_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, } static const EVP_CIPHER cast5_ecb = { - NID_cast5_ecb, CAST_BLOCK, - CAST_KEY_LENGTH, CAST_BLOCK /* iv_len */, - sizeof(CAST_KEY), EVP_CIPH_ECB_MODE | EVP_CIPH_VARIABLE_LENGTH, - NULL /* app_data */, cast_init_key, - cast_ecb_cipher, NULL /* cleanup */, - NULL /* ctrl */, + .nid = NID_cast5_ecb, + .block_size = CAST_BLOCK, + .key_len = CAST_KEY_LENGTH, + .iv_len = CAST_BLOCK, + .ctx_size = sizeof(CAST_KEY), + .flags = EVP_CIPH_ECB_MODE | EVP_CIPH_VARIABLE_LENGTH, + .init = cast_init_key, + .cipher = cast_ecb_cipher, }; static const EVP_CIPHER cast5_cbc = { - NID_cast5_cbc, CAST_BLOCK, - CAST_KEY_LENGTH, CAST_BLOCK /* iv_len */, - sizeof(CAST_KEY), EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH, - NULL /* app_data */, cast_init_key, - cast_cbc_cipher, NULL /* cleanup */, - NULL /* ctrl */, + .nid = NID_cast5_cbc, + .block_size = CAST_BLOCK, + .key_len = CAST_KEY_LENGTH, + .iv_len = CAST_BLOCK, + .ctx_size = sizeof(CAST_KEY), + .flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH, + .init = cast_init_key, + .cipher = cast_cbc_cipher, }; const EVP_CIPHER *EVP_cast5_ecb(void) { return &cast5_ecb; } diff --git a/yass/third_party/boringssl/src/decrepit/cfb/cfb.c b/yass/third_party/boringssl/src/decrepit/cfb/cfb.c index c15292cf02..0ac32288cf 100644 --- a/yass/third_party/boringssl/src/decrepit/cfb/cfb.c +++ b/yass/third_party/boringssl/src/decrepit/cfb/cfb.c @@ -52,24 +52,36 @@ static int aes_cfb128_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, } static const EVP_CIPHER aes_128_cfb128 = { - NID_aes_128_cfb128, 1 /* block_size */, 16 /* key_size */, - 16 /* iv_len */, sizeof(EVP_CFB_CTX), EVP_CIPH_CFB_MODE, - NULL /* app_data */, aes_cfb_init_key, aes_cfb128_cipher, - NULL /* cleanup */, NULL /* ctrl */, + .nid = NID_aes_128_cfb128, + .block_size = 1, + .key_len = 16, + .iv_len = 16, + .ctx_size = sizeof(EVP_CFB_CTX), + .flags = EVP_CIPH_CFB_MODE, + .init = aes_cfb_init_key, + .cipher = aes_cfb128_cipher, }; static const EVP_CIPHER aes_192_cfb128 = { - NID_aes_192_cfb128, 1 /* block_size */, 24 /* key_size */, - 16 /* iv_len */, sizeof(EVP_CFB_CTX), EVP_CIPH_CFB_MODE, - NULL /* app_data */, aes_cfb_init_key, aes_cfb128_cipher, - NULL /* cleanup */, NULL /* ctrl */, + .nid = NID_aes_192_cfb128, + .block_size = 1, + .key_len = 24, + .iv_len = 16, + .ctx_size = sizeof(EVP_CFB_CTX), + .flags = EVP_CIPH_CFB_MODE, + .init = aes_cfb_init_key, + .cipher = aes_cfb128_cipher, }; static const EVP_CIPHER aes_256_cfb128 = { - NID_aes_256_cfb128, 1 /* block_size */, 32 /* key_size */, - 16 /* iv_len */, sizeof(EVP_CFB_CTX), EVP_CIPH_CFB_MODE, - NULL /* app_data */, aes_cfb_init_key, aes_cfb128_cipher, - NULL /* cleanup */, NULL /* ctrl */, + .nid = NID_aes_256_cfb128, + .block_size = 1, + .key_len = 32, + .iv_len = 16, + .ctx_size = sizeof(EVP_CFB_CTX), + .flags = EVP_CIPH_CFB_MODE, + .init = aes_cfb_init_key, + .cipher = aes_cfb128_cipher, }; const EVP_CIPHER *EVP_aes_128_cfb128(void) { return &aes_128_cfb128; } diff --git a/yass/third_party/boringssl/src/decrepit/xts/xts.c b/yass/third_party/boringssl/src/decrepit/xts/xts.c index 8a66f0f224..47c704404e 100644 --- a/yass/third_party/boringssl/src/decrepit/xts/xts.c +++ b/yass/third_party/boringssl/src/decrepit/xts/xts.c @@ -229,11 +229,16 @@ static int aes_xts_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { } static const EVP_CIPHER aes_256_xts = { - NID_aes_256_xts, 1 /* block_size */, 64 /* key_size (2 AES keys) */, - 16 /* iv_len */, sizeof(EVP_AES_XTS_CTX), - EVP_CIPH_XTS_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_ALWAYS_CALL_INIT | - EVP_CIPH_CTRL_INIT | EVP_CIPH_CUSTOM_COPY, - NULL /* app_data */, aes_xts_init_key, aes_xts_cipher, - NULL /* cleanup */, aes_xts_ctrl}; + .nid = NID_aes_256_xts, + .block_size = 1, + .key_len = 64 /* 2 AES-256 keys */, + .iv_len = 16, + .ctx_size = sizeof(EVP_AES_XTS_CTX), + .flags = EVP_CIPH_XTS_MODE | EVP_CIPH_CUSTOM_IV | + EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | + EVP_CIPH_CUSTOM_COPY, + .init = aes_xts_init_key, + .cipher = aes_xts_cipher, + .ctrl = aes_xts_ctrl}; const EVP_CIPHER *EVP_aes_256_xts(void) { return &aes_256_xts; } diff --git a/yass/third_party/boringssl/src/gen/README.md b/yass/third_party/boringssl/src/gen/README.md new file mode 100644 index 0000000000..3ab6ec4e1f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/README.md @@ -0,0 +1,26 @@ +# Pre-generated files + +This directory contains a number of pre-generated build artifacts. To simplify +downstream builds, they are checked into the repository, rather than dynamically +generated as part of the build. + +When developing on BoringSSL, if any inputs to these files are modified, callers +must run the following command to update the generated files: + + go run ./util/pregenerate + +To check that files are up-to-date without updating files, run: + + go run ./util/pregenerate -check + +This is run on CI to ensure the generated files remain up-to-date. + +To speed up local iteration, the tool accepts additional arguments to filter the +files generated. For example, if editing `aesni-x86_64.pl`, this +command will only update files with "aesni-x86_64" as a substring. + + go run ./util/pregenerate aesni-x86_64 + +For convenience, all files in this directory, including this README, are managed +by the tool. This means the whole directory may be deleted and regenerated from +scratch at any time. diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-apple.S new file mode 100644 index 0000000000..e1247bc893 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-apple.S @@ -0,0 +1,868 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + +.p2align 5 +_aesni_ctr32_ghash_6x: + + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp L$oop6x + +.p2align 5 +L$oop6x: + addl $100663296,%ebx + jc L$handle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +L$resume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 + + + + + + + + + + + + + + + + + + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $0x60,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $11,%r10d + jb L$enc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + je L$enc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp L$enc_tail + +.p2align 5 +L$handle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp L$resume_ctr32 + +.p2align 5 +L$enc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + + prefetcht0 512(%rdi) + prefetcht0 576(%rdi) + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $0x60,%rax + subq $0x6,%rdx + jc L$6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp L$oop6x + +L$6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + ret + + +.globl _aesni_gcm_decrypt +.private_extern _aesni_gcm_decrypt + +.p2align 5 +_aesni_gcm_decrypt: + + +_CET_ENDBR + xorq %rax,%rax + + + + cmpq $0x60,%rdx + jb L$gcm_dec_abort + + pushq %rbp + + + movq %rsp,%rbp + + pushq %rbx + + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + pushq %r15 + + + vzeroupper + + movq 16(%rbp),%r12 + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq L$bswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + vmovdqu (%r12),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32(%r9),%r9 + movl 240-128(%rcx),%r10d + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc L$dec_no_key_aliasing + cmpq $768,%r15 + jnc L$dec_no_key_aliasing + subq %r15,%rsp +L$dec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + movq %rdi,%r14 + vmovdqu 64(%rdi),%xmm4 + + + + + + + + leaq -192(%rdi,%rdx,1),%r15 + + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %rax,%rax + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + movq 16(%rbp),%r12 + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,(%r12) + + vzeroupper + leaq -40(%rbp),%rsp + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + popq %rbx + + popq %rbp + +L$gcm_dec_abort: + ret + + + + +.p2align 5 +_aesni_ctr32_6x: + + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -1(%r10),%r13 + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc L$handle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp L$oop_ctr32 + +.p2align 4 +L$oop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz L$oop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + ret +.p2align 5 +L$handle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp L$oop_ctr32 + + + +.globl _aesni_gcm_encrypt +.private_extern _aesni_gcm_encrypt + +.p2align 5 +_aesni_gcm_encrypt: + + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + + movb $1,_BORINGSSL_function_hit+2(%rip) +#endif + xorq %rax,%rax + + + + + cmpq $288,%rdx + jb L$gcm_enc_abort + + pushq %rbp + + + movq %rsp,%rbp + + pushq %rbx + + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + pushq %r15 + + + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq L$bswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 240-128(%rcx),%r10d + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc L$enc_no_key_aliasing + cmpq $768,%r15 + jnc L$enc_no_key_aliasing + subq %r15,%rsp +L$enc_no_key_aliasing: + + movq %rsi,%r14 + + + + + + + + + leaq -192(%rsi,%rdx,1),%r15 + + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + movq 16(%rbp),%r12 + leaq 32(%r9),%r9 + vmovdqu (%r12),%xmm8 + subq $12,%rdx + movq $192,%rax + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + movq 16(%rbp),%r12 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,(%r12) + + vzeroupper + leaq -40(%rbp),%rsp + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + popq %rbx + + popq %rbp + +L$gcm_enc_abort: + ret + + + +.section __DATA,__const +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$poly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +L$one_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +L$two_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +L$one_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-linux.S new file mode 100644 index 0000000000..774a8d12dc --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-linux.S @@ -0,0 +1,883 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.type _aesni_ctr32_ghash_6x,@function +.align 32 +_aesni_ctr32_ghash_6x: +.cfi_startproc + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp .Loop6x + +.align 32 +.Loop6x: + addl $100663296,%ebx + jc .Lhandle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +.Lresume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 + + + + + + + + + + + + + + + + + + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $0x60,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $11,%r10d + jb .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + je .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp .Lenc_tail + +.align 32 +.Lhandle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp .Lresume_ctr32 + +.align 32 +.Lenc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + + prefetcht0 512(%rdi) + prefetcht0 576(%rdi) + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $0x60,%rax + subq $0x6,%rdx + jc .L6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp .Loop6x + +.L6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + ret +.cfi_endproc +.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +.globl aesni_gcm_decrypt +.hidden aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +.align 32 +aesni_gcm_decrypt: +.cfi_startproc + +_CET_ENDBR + xorq %rax,%rax + + + + cmpq $0x60,%rdx + jb .Lgcm_dec_abort + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + pushq %rbx +.cfi_offset %rbx,-24 + + pushq %r12 +.cfi_offset %r12,-32 + + pushq %r13 +.cfi_offset %r13,-40 + + pushq %r14 +.cfi_offset %r14,-48 + + pushq %r15 +.cfi_offset %r15,-56 + + vzeroupper + + movq 16(%rbp),%r12 + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + vmovdqu (%r12),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32(%r9),%r9 + movl 240-128(%rcx),%r10d + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Ldec_no_key_aliasing + cmpq $768,%r15 + jnc .Ldec_no_key_aliasing + subq %r15,%rsp +.Ldec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + movq %rdi,%r14 + vmovdqu 64(%rdi),%xmm4 + + + + + + + + leaq -192(%rdi,%rdx,1),%r15 + + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %rax,%rax + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + movq 16(%rbp),%r12 + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,(%r12) + + vzeroupper + leaq -40(%rbp),%rsp +.cfi_def_cfa %rsp, 0x38 + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp +.Lgcm_dec_abort: + ret + +.cfi_endproc +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +.type _aesni_ctr32_6x,@function +.align 32 +_aesni_ctr32_6x: +.cfi_startproc + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -1(%r10),%r13 + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc .Lhandle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 + +.align 16 +.Loop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz .Loop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + ret +.align 32 +.Lhandle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 +.cfi_endproc +.size _aesni_ctr32_6x,.-_aesni_ctr32_6x + +.globl aesni_gcm_encrypt +.hidden aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +.align 32 +aesni_gcm_encrypt: +.cfi_startproc + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+2(%rip) +#endif + xorq %rax,%rax + + + + + cmpq $288,%rdx + jb .Lgcm_enc_abort + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + pushq %rbx +.cfi_offset %rbx,-24 + + pushq %r12 +.cfi_offset %r12,-32 + + pushq %r13 +.cfi_offset %r13,-40 + + pushq %r14 +.cfi_offset %r14,-48 + + pushq %r15 +.cfi_offset %r15,-56 + + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 240-128(%rcx),%r10d + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Lenc_no_key_aliasing + cmpq $768,%r15 + jnc .Lenc_no_key_aliasing + subq %r15,%rsp +.Lenc_no_key_aliasing: + + movq %rsi,%r14 + + + + + + + + + leaq -192(%rsi,%rdx,1),%r15 + + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + movq 16(%rbp),%r12 + leaq 32(%r9),%r9 + vmovdqu (%r12),%xmm8 + subq $12,%rdx + movq $192,%rax + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + movq 16(%rbp),%r12 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,(%r12) + + vzeroupper + leaq -40(%rbp),%rsp +.cfi_def_cfa %rsp, 0x38 + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp +.Lgcm_enc_abort: + ret + +.cfi_endproc +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +.section .rodata +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lpoly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lone_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Ltwo_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lone_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-win.asm new file mode 100644 index 0000000000..d7a2665e4a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-gcm-x86_64-win.asm @@ -0,0 +1,1101 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + + +ALIGN 32 +_aesni_ctr32_ghash_6x: + + vmovdqu xmm2,XMMWORD[32+r11] + sub r8,6 + vpxor xmm4,xmm4,xmm4 + vmovdqu xmm15,XMMWORD[((0-128))+r9] + vpaddb xmm10,xmm1,xmm2 + vpaddb xmm11,xmm10,xmm2 + vpaddb xmm12,xmm11,xmm2 + vpaddb xmm13,xmm12,xmm2 + vpaddb xmm14,xmm13,xmm2 + vpxor xmm9,xmm1,xmm15 + vmovdqu XMMWORD[(16+8)+rsp],xmm4 + jmp NEAR $L$oop6x + +ALIGN 32 +$L$oop6x: + add ebx,100663296 + jc NEAR $L$handle_ctr32 + vmovdqu xmm3,XMMWORD[((0-32))+rsi] + vpaddb xmm1,xmm14,xmm2 + vpxor xmm10,xmm10,xmm15 + vpxor xmm11,xmm11,xmm15 + +$L$resume_ctr32: + vmovdqu XMMWORD[rdi],xmm1 + vpclmulqdq xmm5,xmm7,xmm3,0x10 + vpxor xmm12,xmm12,xmm15 + vmovups xmm2,XMMWORD[((16-128))+r9] + vpclmulqdq xmm6,xmm7,xmm3,0x01 + + + + + + + + + + + + + + + + + + xor r12,r12 + cmp r15,r14 + + vaesenc xmm9,xmm9,xmm2 + vmovdqu xmm0,XMMWORD[((48+8))+rsp] + vpxor xmm13,xmm13,xmm15 + vpclmulqdq xmm1,xmm7,xmm3,0x00 + vaesenc xmm10,xmm10,xmm2 + vpxor xmm14,xmm14,xmm15 + setnc r12b + vpclmulqdq xmm7,xmm7,xmm3,0x11 + vaesenc xmm11,xmm11,xmm2 + vmovdqu xmm3,XMMWORD[((16-32))+rsi] + neg r12 + vaesenc xmm12,xmm12,xmm2 + vpxor xmm6,xmm6,xmm5 + vpclmulqdq xmm5,xmm0,xmm3,0x00 + vpxor xmm8,xmm8,xmm4 + vaesenc xmm13,xmm13,xmm2 + vpxor xmm4,xmm1,xmm5 + and r12,0x60 + vmovups xmm15,XMMWORD[((32-128))+r9] + vpclmulqdq xmm1,xmm0,xmm3,0x10 + vaesenc xmm14,xmm14,xmm2 + + vpclmulqdq xmm2,xmm0,xmm3,0x01 + lea r14,[r12*1+r14] + vaesenc xmm9,xmm9,xmm15 + vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] + vpclmulqdq xmm3,xmm0,xmm3,0x11 + vmovdqu xmm0,XMMWORD[((64+8))+rsp] + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD[88+r14] + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD[80+r14] + vaesenc xmm12,xmm12,xmm15 + mov QWORD[((32+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD[((40+8))+rsp],r12 + vmovdqu xmm5,XMMWORD[((48-32))+rsi] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD[((48-128))+r9] + vpxor xmm6,xmm6,xmm1 + vpclmulqdq xmm1,xmm0,xmm5,0x00 + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm2 + vpclmulqdq xmm2,xmm0,xmm5,0x10 + vaesenc xmm10,xmm10,xmm15 + vpxor xmm7,xmm7,xmm3 + vpclmulqdq xmm3,xmm0,xmm5,0x01 + vaesenc xmm11,xmm11,xmm15 + vpclmulqdq xmm5,xmm0,xmm5,0x11 + vmovdqu xmm0,XMMWORD[((80+8))+rsp] + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vpxor xmm4,xmm4,xmm1 + vmovdqu xmm1,XMMWORD[((64-32))+rsi] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD[((64-128))+r9] + vpxor xmm6,xmm6,xmm2 + vpclmulqdq xmm2,xmm0,xmm1,0x00 + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm3 + vpclmulqdq xmm3,xmm0,xmm1,0x10 + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD[72+r14] + vpxor xmm7,xmm7,xmm5 + vpclmulqdq xmm5,xmm0,xmm1,0x01 + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD[64+r14] + vpclmulqdq xmm1,xmm0,xmm1,0x11 + vmovdqu xmm0,XMMWORD[((96+8))+rsp] + vaesenc xmm12,xmm12,xmm15 + mov QWORD[((48+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD[((56+8))+rsp],r12 + vpxor xmm4,xmm4,xmm2 + vmovdqu xmm2,XMMWORD[((96-32))+rsi] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD[((80-128))+r9] + vpxor xmm6,xmm6,xmm3 + vpclmulqdq xmm3,xmm0,xmm2,0x00 + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm5 + vpclmulqdq xmm5,xmm0,xmm2,0x10 + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD[56+r14] + vpxor xmm7,xmm7,xmm1 + vpclmulqdq xmm1,xmm0,xmm2,0x01 + vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD[48+r14] + vpclmulqdq xmm2,xmm0,xmm2,0x11 + vaesenc xmm12,xmm12,xmm15 + mov QWORD[((64+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD[((72+8))+rsp],r12 + vpxor xmm4,xmm4,xmm3 + vmovdqu xmm3,XMMWORD[((112-32))+rsi] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD[((96-128))+r9] + vpxor xmm6,xmm6,xmm5 + vpclmulqdq xmm5,xmm8,xmm3,0x10 + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm1 + vpclmulqdq xmm1,xmm8,xmm3,0x01 + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD[40+r14] + vpxor xmm7,xmm7,xmm2 + vpclmulqdq xmm2,xmm8,xmm3,0x00 + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD[32+r14] + vpclmulqdq xmm8,xmm8,xmm3,0x11 + vaesenc xmm12,xmm12,xmm15 + mov QWORD[((80+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD[((88+8))+rsp],r12 + vpxor xmm6,xmm6,xmm5 + vaesenc xmm14,xmm14,xmm15 + vpxor xmm6,xmm6,xmm1 + + vmovups xmm15,XMMWORD[((112-128))+r9] + vpslldq xmm5,xmm6,8 + vpxor xmm4,xmm4,xmm2 + vmovdqu xmm3,XMMWORD[16+r11] + + vaesenc xmm9,xmm9,xmm15 + vpxor xmm7,xmm7,xmm8 + vaesenc xmm10,xmm10,xmm15 + vpxor xmm4,xmm4,xmm5 + movbe r13,QWORD[24+r14] + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD[16+r14] + vpalignr xmm0,xmm4,xmm4,8 + vpclmulqdq xmm4,xmm4,xmm3,0x10 + mov QWORD[((96+8))+rsp],r13 + vaesenc xmm12,xmm12,xmm15 + mov QWORD[((104+8))+rsp],r12 + vaesenc xmm13,xmm13,xmm15 + vmovups xmm1,XMMWORD[((128-128))+r9] + vaesenc xmm14,xmm14,xmm15 + + vaesenc xmm9,xmm9,xmm1 + vmovups xmm15,XMMWORD[((144-128))+r9] + vaesenc xmm10,xmm10,xmm1 + vpsrldq xmm6,xmm6,8 + vaesenc xmm11,xmm11,xmm1 + vpxor xmm7,xmm7,xmm6 + vaesenc xmm12,xmm12,xmm1 + vpxor xmm4,xmm4,xmm0 + movbe r13,QWORD[8+r14] + vaesenc xmm13,xmm13,xmm1 + movbe r12,QWORD[r14] + vaesenc xmm14,xmm14,xmm1 + vmovups xmm1,XMMWORD[((160-128))+r9] + cmp r10d,11 + jb NEAR $L$enc_tail + + vaesenc xmm9,xmm9,xmm15 + vaesenc xmm10,xmm10,xmm15 + vaesenc xmm11,xmm11,xmm15 + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vaesenc xmm14,xmm14,xmm15 + + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + vmovups xmm15,XMMWORD[((176-128))+r9] + vaesenc xmm14,xmm14,xmm1 + vmovups xmm1,XMMWORD[((192-128))+r9] + je NEAR $L$enc_tail + + vaesenc xmm9,xmm9,xmm15 + vaesenc xmm10,xmm10,xmm15 + vaesenc xmm11,xmm11,xmm15 + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vaesenc xmm14,xmm14,xmm15 + + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + vmovups xmm15,XMMWORD[((208-128))+r9] + vaesenc xmm14,xmm14,xmm1 + vmovups xmm1,XMMWORD[((224-128))+r9] + jmp NEAR $L$enc_tail + +ALIGN 32 +$L$handle_ctr32: + vmovdqu xmm0,XMMWORD[r11] + vpshufb xmm6,xmm1,xmm0 + vmovdqu xmm5,XMMWORD[48+r11] + vpaddd xmm10,xmm6,XMMWORD[64+r11] + vpaddd xmm11,xmm6,xmm5 + vmovdqu xmm3,XMMWORD[((0-32))+rsi] + vpaddd xmm12,xmm10,xmm5 + vpshufb xmm10,xmm10,xmm0 + vpaddd xmm13,xmm11,xmm5 + vpshufb xmm11,xmm11,xmm0 + vpxor xmm10,xmm10,xmm15 + vpaddd xmm14,xmm12,xmm5 + vpshufb xmm12,xmm12,xmm0 + vpxor xmm11,xmm11,xmm15 + vpaddd xmm1,xmm13,xmm5 + vpshufb xmm13,xmm13,xmm0 + vpshufb xmm14,xmm14,xmm0 + vpshufb xmm1,xmm1,xmm0 + jmp NEAR $L$resume_ctr32 + +ALIGN 32 +$L$enc_tail: + vaesenc xmm9,xmm9,xmm15 + vmovdqu XMMWORD[(16+8)+rsp],xmm7 + vpalignr xmm8,xmm4,xmm4,8 + vaesenc xmm10,xmm10,xmm15 + vpclmulqdq xmm4,xmm4,xmm3,0x10 + vpxor xmm2,xmm1,XMMWORD[rcx] + vaesenc xmm11,xmm11,xmm15 + vpxor xmm0,xmm1,XMMWORD[16+rcx] + vaesenc xmm12,xmm12,xmm15 + vpxor xmm5,xmm1,XMMWORD[32+rcx] + vaesenc xmm13,xmm13,xmm15 + vpxor xmm6,xmm1,XMMWORD[48+rcx] + vaesenc xmm14,xmm14,xmm15 + vpxor xmm7,xmm1,XMMWORD[64+rcx] + vpxor xmm3,xmm1,XMMWORD[80+rcx] + vmovdqu xmm1,XMMWORD[rdi] + + vaesenclast xmm9,xmm9,xmm2 + vmovdqu xmm2,XMMWORD[32+r11] + vaesenclast xmm10,xmm10,xmm0 + vpaddb xmm0,xmm1,xmm2 + mov QWORD[((112+8))+rsp],r13 + lea rcx,[96+rcx] + + prefetcht0 [512+rcx] + prefetcht0 [576+rcx] + vaesenclast xmm11,xmm11,xmm5 + vpaddb xmm5,xmm0,xmm2 + mov QWORD[((120+8))+rsp],r12 + lea rdx,[96+rdx] + vmovdqu xmm15,XMMWORD[((0-128))+r9] + vaesenclast xmm12,xmm12,xmm6 + vpaddb xmm6,xmm5,xmm2 + vaesenclast xmm13,xmm13,xmm7 + vpaddb xmm7,xmm6,xmm2 + vaesenclast xmm14,xmm14,xmm3 + vpaddb xmm3,xmm7,xmm2 + + add rax,0x60 + sub r8,0x6 + jc NEAR $L$6x_done + + vmovups XMMWORD[(-96)+rdx],xmm9 + vpxor xmm9,xmm1,xmm15 + vmovups XMMWORD[(-80)+rdx],xmm10 + vmovdqa xmm10,xmm0 + vmovups XMMWORD[(-64)+rdx],xmm11 + vmovdqa xmm11,xmm5 + vmovups XMMWORD[(-48)+rdx],xmm12 + vmovdqa xmm12,xmm6 + vmovups XMMWORD[(-32)+rdx],xmm13 + vmovdqa xmm13,xmm7 + vmovups XMMWORD[(-16)+rdx],xmm14 + vmovdqa xmm14,xmm3 + vmovdqu xmm7,XMMWORD[((32+8))+rsp] + jmp NEAR $L$oop6x + +$L$6x_done: + vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] + vpxor xmm8,xmm8,xmm4 + + ret + + +global aesni_gcm_decrypt + +ALIGN 32 +aesni_gcm_decrypt: + +$L$SEH_begin_aesni_gcm_decrypt_1: +_CET_ENDBR + xor rax,rax + + + + cmp r8,0x60 + jb NEAR $L$gcm_dec_abort + + push rbp + +$L$SEH_prolog_aesni_gcm_decrypt_2: + mov rbp,rsp + + push rbx + +$L$SEH_prolog_aesni_gcm_decrypt_3: + push r12 + +$L$SEH_prolog_aesni_gcm_decrypt_4: + push r13 + +$L$SEH_prolog_aesni_gcm_decrypt_5: + push r14 + +$L$SEH_prolog_aesni_gcm_decrypt_6: + push r15 + +$L$SEH_prolog_aesni_gcm_decrypt_7: + lea rsp,[((-168))+rsp] +$L$SEH_prolog_aesni_gcm_decrypt_8: +$L$SEH_prolog_aesni_gcm_decrypt_9: + + + + mov QWORD[16+rbp],rdi +$L$SEH_prolog_aesni_gcm_decrypt_10: + mov QWORD[24+rbp],rsi +$L$SEH_prolog_aesni_gcm_decrypt_11: + mov rdi,QWORD[48+rbp] + mov rsi,QWORD[56+rbp] + + movaps XMMWORD[(-208)+rbp],xmm6 +$L$SEH_prolog_aesni_gcm_decrypt_12: + movaps XMMWORD[(-192)+rbp],xmm7 +$L$SEH_prolog_aesni_gcm_decrypt_13: + movaps XMMWORD[(-176)+rbp],xmm8 +$L$SEH_prolog_aesni_gcm_decrypt_14: + movaps XMMWORD[(-160)+rbp],xmm9 +$L$SEH_prolog_aesni_gcm_decrypt_15: + movaps XMMWORD[(-144)+rbp],xmm10 +$L$SEH_prolog_aesni_gcm_decrypt_16: + movaps XMMWORD[(-128)+rbp],xmm11 +$L$SEH_prolog_aesni_gcm_decrypt_17: + movaps XMMWORD[(-112)+rbp],xmm12 +$L$SEH_prolog_aesni_gcm_decrypt_18: + movaps XMMWORD[(-96)+rbp],xmm13 +$L$SEH_prolog_aesni_gcm_decrypt_19: + movaps XMMWORD[(-80)+rbp],xmm14 +$L$SEH_prolog_aesni_gcm_decrypt_20: + movaps XMMWORD[(-64)+rbp],xmm15 +$L$SEH_prolog_aesni_gcm_decrypt_21: + vzeroupper + + mov r12,QWORD[64+rbp] + vmovdqu xmm1,XMMWORD[rdi] + add rsp,-128 + mov ebx,DWORD[12+rdi] + lea r11,[$L$bswap_mask] + lea r14,[((-128))+r9] + mov r15,0xf80 + vmovdqu xmm8,XMMWORD[r12] + and rsp,-128 + vmovdqu xmm0,XMMWORD[r11] + lea r9,[128+r9] + lea rsi,[32+rsi] + mov r10d,DWORD[((240-128))+r9] + vpshufb xmm8,xmm8,xmm0 + + and r14,r15 + and r15,rsp + sub r15,r14 + jc NEAR $L$dec_no_key_aliasing + cmp r15,768 + jnc NEAR $L$dec_no_key_aliasing + sub rsp,r15 +$L$dec_no_key_aliasing: + + vmovdqu xmm7,XMMWORD[80+rcx] + mov r14,rcx + vmovdqu xmm4,XMMWORD[64+rcx] + + + + + + + + lea r15,[((-192))+r8*1+rcx] + + vmovdqu xmm5,XMMWORD[48+rcx] + shr r8,4 + xor rax,rax + vmovdqu xmm6,XMMWORD[32+rcx] + vpshufb xmm7,xmm7,xmm0 + vmovdqu xmm2,XMMWORD[16+rcx] + vpshufb xmm4,xmm4,xmm0 + vmovdqu xmm3,XMMWORD[rcx] + vpshufb xmm5,xmm5,xmm0 + vmovdqu XMMWORD[48+rsp],xmm4 + vpshufb xmm6,xmm6,xmm0 + vmovdqu XMMWORD[64+rsp],xmm5 + vpshufb xmm2,xmm2,xmm0 + vmovdqu XMMWORD[80+rsp],xmm6 + vpshufb xmm3,xmm3,xmm0 + vmovdqu XMMWORD[96+rsp],xmm2 + vmovdqu XMMWORD[112+rsp],xmm3 + + call _aesni_ctr32_ghash_6x + + mov r12,QWORD[64+rbp] + vmovups XMMWORD[(-96)+rdx],xmm9 + vmovups XMMWORD[(-80)+rdx],xmm10 + vmovups XMMWORD[(-64)+rdx],xmm11 + vmovups XMMWORD[(-48)+rdx],xmm12 + vmovups XMMWORD[(-32)+rdx],xmm13 + vmovups XMMWORD[(-16)+rdx],xmm14 + + vpshufb xmm8,xmm8,XMMWORD[r11] + vmovdqu XMMWORD[r12],xmm8 + + vzeroupper + movaps xmm6,XMMWORD[((-208))+rbp] + movaps xmm7,XMMWORD[((-192))+rbp] + movaps xmm8,XMMWORD[((-176))+rbp] + movaps xmm9,XMMWORD[((-160))+rbp] + movaps xmm10,XMMWORD[((-144))+rbp] + movaps xmm11,XMMWORD[((-128))+rbp] + movaps xmm12,XMMWORD[((-112))+rbp] + movaps xmm13,XMMWORD[((-96))+rbp] + movaps xmm14,XMMWORD[((-80))+rbp] + movaps xmm15,XMMWORD[((-64))+rbp] + mov rdi,QWORD[16+rbp] + mov rsi,QWORD[24+rbp] + lea rsp,[((-40))+rbp] + + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbx + + pop rbp + +$L$gcm_dec_abort: + ret +$L$SEH_end_aesni_gcm_decrypt_22: + + + +ALIGN 32 +_aesni_ctr32_6x: + + vmovdqu xmm4,XMMWORD[((0-128))+r9] + vmovdqu xmm2,XMMWORD[32+r11] + lea r13,[((-1))+r10] + vmovups xmm15,XMMWORD[((16-128))+r9] + lea r12,[((32-128))+r9] + vpxor xmm9,xmm1,xmm4 + add ebx,100663296 + jc NEAR $L$handle_ctr32_2 + vpaddb xmm10,xmm1,xmm2 + vpaddb xmm11,xmm10,xmm2 + vpxor xmm10,xmm10,xmm4 + vpaddb xmm12,xmm11,xmm2 + vpxor xmm11,xmm11,xmm4 + vpaddb xmm13,xmm12,xmm2 + vpxor xmm12,xmm12,xmm4 + vpaddb xmm14,xmm13,xmm2 + vpxor xmm13,xmm13,xmm4 + vpaddb xmm1,xmm14,xmm2 + vpxor xmm14,xmm14,xmm4 + jmp NEAR $L$oop_ctr32 + +ALIGN 16 +$L$oop_ctr32: + vaesenc xmm9,xmm9,xmm15 + vaesenc xmm10,xmm10,xmm15 + vaesenc xmm11,xmm11,xmm15 + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vaesenc xmm14,xmm14,xmm15 + vmovups xmm15,XMMWORD[r12] + lea r12,[16+r12] + dec r13d + jnz NEAR $L$oop_ctr32 + + vmovdqu xmm3,XMMWORD[r12] + vaesenc xmm9,xmm9,xmm15 + vpxor xmm4,xmm3,XMMWORD[rcx] + vaesenc xmm10,xmm10,xmm15 + vpxor xmm5,xmm3,XMMWORD[16+rcx] + vaesenc xmm11,xmm11,xmm15 + vpxor xmm6,xmm3,XMMWORD[32+rcx] + vaesenc xmm12,xmm12,xmm15 + vpxor xmm8,xmm3,XMMWORD[48+rcx] + vaesenc xmm13,xmm13,xmm15 + vpxor xmm2,xmm3,XMMWORD[64+rcx] + vaesenc xmm14,xmm14,xmm15 + vpxor xmm3,xmm3,XMMWORD[80+rcx] + lea rcx,[96+rcx] + + vaesenclast xmm9,xmm9,xmm4 + vaesenclast xmm10,xmm10,xmm5 + vaesenclast xmm11,xmm11,xmm6 + vaesenclast xmm12,xmm12,xmm8 + vaesenclast xmm13,xmm13,xmm2 + vaesenclast xmm14,xmm14,xmm3 + vmovups XMMWORD[rdx],xmm9 + vmovups XMMWORD[16+rdx],xmm10 + vmovups XMMWORD[32+rdx],xmm11 + vmovups XMMWORD[48+rdx],xmm12 + vmovups XMMWORD[64+rdx],xmm13 + vmovups XMMWORD[80+rdx],xmm14 + lea rdx,[96+rdx] + + ret +ALIGN 32 +$L$handle_ctr32_2: + vpshufb xmm6,xmm1,xmm0 + vmovdqu xmm5,XMMWORD[48+r11] + vpaddd xmm10,xmm6,XMMWORD[64+r11] + vpaddd xmm11,xmm6,xmm5 + vpaddd xmm12,xmm10,xmm5 + vpshufb xmm10,xmm10,xmm0 + vpaddd xmm13,xmm11,xmm5 + vpshufb xmm11,xmm11,xmm0 + vpxor xmm10,xmm10,xmm4 + vpaddd xmm14,xmm12,xmm5 + vpshufb xmm12,xmm12,xmm0 + vpxor xmm11,xmm11,xmm4 + vpaddd xmm1,xmm13,xmm5 + vpshufb xmm13,xmm13,xmm0 + vpxor xmm12,xmm12,xmm4 + vpshufb xmm14,xmm14,xmm0 + vpxor xmm13,xmm13,xmm4 + vpshufb xmm1,xmm1,xmm0 + vpxor xmm14,xmm14,xmm4 + jmp NEAR $L$oop_ctr32 + + + +global aesni_gcm_encrypt + +ALIGN 32 +aesni_gcm_encrypt: + +$L$SEH_begin_aesni_gcm_encrypt_1: +_CET_ENDBR +%ifdef BORINGSSL_DISPATCH_TEST +EXTERN BORINGSSL_function_hit + mov BYTE[((BORINGSSL_function_hit+2))],1 +%endif + xor rax,rax + + + + + cmp r8,0x60*3 + jb NEAR $L$gcm_enc_abort + + push rbp + +$L$SEH_prolog_aesni_gcm_encrypt_2: + mov rbp,rsp + + push rbx + +$L$SEH_prolog_aesni_gcm_encrypt_3: + push r12 + +$L$SEH_prolog_aesni_gcm_encrypt_4: + push r13 + +$L$SEH_prolog_aesni_gcm_encrypt_5: + push r14 + +$L$SEH_prolog_aesni_gcm_encrypt_6: + push r15 + +$L$SEH_prolog_aesni_gcm_encrypt_7: + lea rsp,[((-168))+rsp] +$L$SEH_prolog_aesni_gcm_encrypt_8: +$L$SEH_prolog_aesni_gcm_encrypt_9: + + + + mov QWORD[16+rbp],rdi +$L$SEH_prolog_aesni_gcm_encrypt_10: + mov QWORD[24+rbp],rsi +$L$SEH_prolog_aesni_gcm_encrypt_11: + mov rdi,QWORD[48+rbp] + mov rsi,QWORD[56+rbp] + + movaps XMMWORD[(-208)+rbp],xmm6 +$L$SEH_prolog_aesni_gcm_encrypt_12: + movaps XMMWORD[(-192)+rbp],xmm7 +$L$SEH_prolog_aesni_gcm_encrypt_13: + movaps XMMWORD[(-176)+rbp],xmm8 +$L$SEH_prolog_aesni_gcm_encrypt_14: + movaps XMMWORD[(-160)+rbp],xmm9 +$L$SEH_prolog_aesni_gcm_encrypt_15: + movaps XMMWORD[(-144)+rbp],xmm10 +$L$SEH_prolog_aesni_gcm_encrypt_16: + movaps XMMWORD[(-128)+rbp],xmm11 +$L$SEH_prolog_aesni_gcm_encrypt_17: + movaps XMMWORD[(-112)+rbp],xmm12 +$L$SEH_prolog_aesni_gcm_encrypt_18: + movaps XMMWORD[(-96)+rbp],xmm13 +$L$SEH_prolog_aesni_gcm_encrypt_19: + movaps XMMWORD[(-80)+rbp],xmm14 +$L$SEH_prolog_aesni_gcm_encrypt_20: + movaps XMMWORD[(-64)+rbp],xmm15 +$L$SEH_prolog_aesni_gcm_encrypt_21: + vzeroupper + + vmovdqu xmm1,XMMWORD[rdi] + add rsp,-128 + mov ebx,DWORD[12+rdi] + lea r11,[$L$bswap_mask] + lea r14,[((-128))+r9] + mov r15,0xf80 + lea r9,[128+r9] + vmovdqu xmm0,XMMWORD[r11] + and rsp,-128 + mov r10d,DWORD[((240-128))+r9] + + and r14,r15 + and r15,rsp + sub r15,r14 + jc NEAR $L$enc_no_key_aliasing + cmp r15,768 + jnc NEAR $L$enc_no_key_aliasing + sub rsp,r15 +$L$enc_no_key_aliasing: + + mov r14,rdx + + + + + + + + + lea r15,[((-192))+r8*1+rdx] + + shr r8,4 + + call _aesni_ctr32_6x + vpshufb xmm8,xmm9,xmm0 + vpshufb xmm2,xmm10,xmm0 + vmovdqu XMMWORD[112+rsp],xmm8 + vpshufb xmm4,xmm11,xmm0 + vmovdqu XMMWORD[96+rsp],xmm2 + vpshufb xmm5,xmm12,xmm0 + vmovdqu XMMWORD[80+rsp],xmm4 + vpshufb xmm6,xmm13,xmm0 + vmovdqu XMMWORD[64+rsp],xmm5 + vpshufb xmm7,xmm14,xmm0 + vmovdqu XMMWORD[48+rsp],xmm6 + + call _aesni_ctr32_6x + + mov r12,QWORD[64+rbp] + lea rsi,[32+rsi] + vmovdqu xmm8,XMMWORD[r12] + sub r8,12 + mov rax,0x60*2 + vpshufb xmm8,xmm8,xmm0 + + call _aesni_ctr32_ghash_6x + vmovdqu xmm7,XMMWORD[32+rsp] + vmovdqu xmm0,XMMWORD[r11] + vmovdqu xmm3,XMMWORD[((0-32))+rsi] + vpunpckhqdq xmm1,xmm7,xmm7 + vmovdqu xmm15,XMMWORD[((32-32))+rsi] + vmovups XMMWORD[(-96)+rdx],xmm9 + vpshufb xmm9,xmm9,xmm0 + vpxor xmm1,xmm1,xmm7 + vmovups XMMWORD[(-80)+rdx],xmm10 + vpshufb xmm10,xmm10,xmm0 + vmovups XMMWORD[(-64)+rdx],xmm11 + vpshufb xmm11,xmm11,xmm0 + vmovups XMMWORD[(-48)+rdx],xmm12 + vpshufb xmm12,xmm12,xmm0 + vmovups XMMWORD[(-32)+rdx],xmm13 + vpshufb xmm13,xmm13,xmm0 + vmovups XMMWORD[(-16)+rdx],xmm14 + vpshufb xmm14,xmm14,xmm0 + vmovdqu XMMWORD[16+rsp],xmm9 + vmovdqu xmm6,XMMWORD[48+rsp] + vmovdqu xmm0,XMMWORD[((16-32))+rsi] + vpunpckhqdq xmm2,xmm6,xmm6 + vpclmulqdq xmm5,xmm7,xmm3,0x00 + vpxor xmm2,xmm2,xmm6 + vpclmulqdq xmm7,xmm7,xmm3,0x11 + vpclmulqdq xmm1,xmm1,xmm15,0x00 + + vmovdqu xmm9,XMMWORD[64+rsp] + vpclmulqdq xmm4,xmm6,xmm0,0x00 + vmovdqu xmm3,XMMWORD[((48-32))+rsi] + vpxor xmm4,xmm4,xmm5 + vpunpckhqdq xmm5,xmm9,xmm9 + vpclmulqdq xmm6,xmm6,xmm0,0x11 + vpxor xmm5,xmm5,xmm9 + vpxor xmm6,xmm6,xmm7 + vpclmulqdq xmm2,xmm2,xmm15,0x10 + vmovdqu xmm15,XMMWORD[((80-32))+rsi] + vpxor xmm2,xmm2,xmm1 + + vmovdqu xmm1,XMMWORD[80+rsp] + vpclmulqdq xmm7,xmm9,xmm3,0x00 + vmovdqu xmm0,XMMWORD[((64-32))+rsi] + vpxor xmm7,xmm7,xmm4 + vpunpckhqdq xmm4,xmm1,xmm1 + vpclmulqdq xmm9,xmm9,xmm3,0x11 + vpxor xmm4,xmm4,xmm1 + vpxor xmm9,xmm9,xmm6 + vpclmulqdq xmm5,xmm5,xmm15,0x00 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm2,XMMWORD[96+rsp] + vpclmulqdq xmm6,xmm1,xmm0,0x00 + vmovdqu xmm3,XMMWORD[((96-32))+rsi] + vpxor xmm6,xmm6,xmm7 + vpunpckhqdq xmm7,xmm2,xmm2 + vpclmulqdq xmm1,xmm1,xmm0,0x11 + vpxor xmm7,xmm7,xmm2 + vpxor xmm1,xmm1,xmm9 + vpclmulqdq xmm4,xmm4,xmm15,0x10 + vmovdqu xmm15,XMMWORD[((128-32))+rsi] + vpxor xmm4,xmm4,xmm5 + + vpxor xmm8,xmm8,XMMWORD[112+rsp] + vpclmulqdq xmm5,xmm2,xmm3,0x00 + vmovdqu xmm0,XMMWORD[((112-32))+rsi] + vpunpckhqdq xmm9,xmm8,xmm8 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm2,xmm2,xmm3,0x11 + vpxor xmm9,xmm9,xmm8 + vpxor xmm2,xmm2,xmm1 + vpclmulqdq xmm7,xmm7,xmm15,0x00 + vpxor xmm4,xmm7,xmm4 + + vpclmulqdq xmm6,xmm8,xmm0,0x00 + vmovdqu xmm3,XMMWORD[((0-32))+rsi] + vpunpckhqdq xmm1,xmm14,xmm14 + vpclmulqdq xmm8,xmm8,xmm0,0x11 + vpxor xmm1,xmm1,xmm14 + vpxor xmm5,xmm6,xmm5 + vpclmulqdq xmm9,xmm9,xmm15,0x10 + vmovdqu xmm15,XMMWORD[((32-32))+rsi] + vpxor xmm7,xmm8,xmm2 + vpxor xmm6,xmm9,xmm4 + + vmovdqu xmm0,XMMWORD[((16-32))+rsi] + vpxor xmm9,xmm7,xmm5 + vpclmulqdq xmm4,xmm14,xmm3,0x00 + vpxor xmm6,xmm6,xmm9 + vpunpckhqdq xmm2,xmm13,xmm13 + vpclmulqdq xmm14,xmm14,xmm3,0x11 + vpxor xmm2,xmm2,xmm13 + vpslldq xmm9,xmm6,8 + vpclmulqdq xmm1,xmm1,xmm15,0x00 + vpxor xmm8,xmm5,xmm9 + vpsrldq xmm6,xmm6,8 + vpxor xmm7,xmm7,xmm6 + + vpclmulqdq xmm5,xmm13,xmm0,0x00 + vmovdqu xmm3,XMMWORD[((48-32))+rsi] + vpxor xmm5,xmm5,xmm4 + vpunpckhqdq xmm9,xmm12,xmm12 + vpclmulqdq xmm13,xmm13,xmm0,0x11 + vpxor xmm9,xmm9,xmm12 + vpxor xmm13,xmm13,xmm14 + vpalignr xmm14,xmm8,xmm8,8 + vpclmulqdq xmm2,xmm2,xmm15,0x10 + vmovdqu xmm15,XMMWORD[((80-32))+rsi] + vpxor xmm2,xmm2,xmm1 + + vpclmulqdq xmm4,xmm12,xmm3,0x00 + vmovdqu xmm0,XMMWORD[((64-32))+rsi] + vpxor xmm4,xmm4,xmm5 + vpunpckhqdq xmm1,xmm11,xmm11 + vpclmulqdq xmm12,xmm12,xmm3,0x11 + vpxor xmm1,xmm1,xmm11 + vpxor xmm12,xmm12,xmm13 + vxorps xmm7,xmm7,XMMWORD[16+rsp] + vpclmulqdq xmm9,xmm9,xmm15,0x00 + vpxor xmm9,xmm9,xmm2 + + vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 + vxorps xmm8,xmm8,xmm14 + + vpclmulqdq xmm5,xmm11,xmm0,0x00 + vmovdqu xmm3,XMMWORD[((96-32))+rsi] + vpxor xmm5,xmm5,xmm4 + vpunpckhqdq xmm2,xmm10,xmm10 + vpclmulqdq xmm11,xmm11,xmm0,0x11 + vpxor xmm2,xmm2,xmm10 + vpalignr xmm14,xmm8,xmm8,8 + vpxor xmm11,xmm11,xmm12 + vpclmulqdq xmm1,xmm1,xmm15,0x10 + vmovdqu xmm15,XMMWORD[((128-32))+rsi] + vpxor xmm1,xmm1,xmm9 + + vxorps xmm14,xmm14,xmm7 + vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 + vxorps xmm8,xmm8,xmm14 + + vpclmulqdq xmm4,xmm10,xmm3,0x00 + vmovdqu xmm0,XMMWORD[((112-32))+rsi] + vpxor xmm4,xmm4,xmm5 + vpunpckhqdq xmm9,xmm8,xmm8 + vpclmulqdq xmm10,xmm10,xmm3,0x11 + vpxor xmm9,xmm9,xmm8 + vpxor xmm10,xmm10,xmm11 + vpclmulqdq xmm2,xmm2,xmm15,0x00 + vpxor xmm2,xmm2,xmm1 + + vpclmulqdq xmm5,xmm8,xmm0,0x00 + vpclmulqdq xmm7,xmm8,xmm0,0x11 + vpxor xmm5,xmm5,xmm4 + vpclmulqdq xmm6,xmm9,xmm15,0x10 + vpxor xmm7,xmm7,xmm10 + vpxor xmm6,xmm6,xmm2 + + vpxor xmm4,xmm7,xmm5 + vpxor xmm6,xmm6,xmm4 + vpslldq xmm1,xmm6,8 + vmovdqu xmm3,XMMWORD[16+r11] + vpsrldq xmm6,xmm6,8 + vpxor xmm8,xmm5,xmm1 + vpxor xmm7,xmm7,xmm6 + + vpalignr xmm2,xmm8,xmm8,8 + vpclmulqdq xmm8,xmm8,xmm3,0x10 + vpxor xmm8,xmm8,xmm2 + + vpalignr xmm2,xmm8,xmm8,8 + vpclmulqdq xmm8,xmm8,xmm3,0x10 + vpxor xmm2,xmm2,xmm7 + vpxor xmm8,xmm8,xmm2 + mov r12,QWORD[64+rbp] + vpshufb xmm8,xmm8,XMMWORD[r11] + vmovdqu XMMWORD[r12],xmm8 + + vzeroupper + movaps xmm6,XMMWORD[((-208))+rbp] + movaps xmm7,XMMWORD[((-192))+rbp] + movaps xmm8,XMMWORD[((-176))+rbp] + movaps xmm9,XMMWORD[((-160))+rbp] + movaps xmm10,XMMWORD[((-144))+rbp] + movaps xmm11,XMMWORD[((-128))+rbp] + movaps xmm12,XMMWORD[((-112))+rbp] + movaps xmm13,XMMWORD[((-96))+rbp] + movaps xmm14,XMMWORD[((-80))+rbp] + movaps xmm15,XMMWORD[((-64))+rbp] + mov rdi,QWORD[16+rbp] + mov rsi,QWORD[24+rbp] + lea rsp,[((-40))+rbp] + + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbx + + pop rbp + +$L$gcm_enc_abort: + ret +$L$SEH_end_aesni_gcm_encrypt_22: + + +section .rdata rdata align=8 +ALIGN 64 +$L$bswap_mask: + DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$poly: + DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +$L$one_msb: + DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +$L$two_lsb: + DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +$L$one_lsb: + DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 + DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 + DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 + DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +ALIGN 64 +section .text + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_aesni_gcm_decrypt_1 wrt ..imagebase + DD $L$SEH_end_aesni_gcm_decrypt_22 wrt ..imagebase + DD $L$SEH_info_aesni_gcm_decrypt_0 wrt ..imagebase + + DD $L$SEH_begin_aesni_gcm_encrypt_1 wrt ..imagebase + DD $L$SEH_end_aesni_gcm_encrypt_22 wrt ..imagebase + DD $L$SEH_info_aesni_gcm_encrypt_0 wrt ..imagebase + + +section .xdata rdata align=8 +ALIGN 4 +$L$SEH_info_aesni_gcm_decrypt_0: + DB 1 + DB $L$SEH_prolog_aesni_gcm_decrypt_21-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 33 + DB 213 + DB $L$SEH_prolog_aesni_gcm_decrypt_21-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 248 + DW 9 + DB $L$SEH_prolog_aesni_gcm_decrypt_20-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 232 + DW 8 + DB $L$SEH_prolog_aesni_gcm_decrypt_19-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 216 + DW 7 + DB $L$SEH_prolog_aesni_gcm_decrypt_18-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 200 + DW 6 + DB $L$SEH_prolog_aesni_gcm_decrypt_17-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 184 + DW 5 + DB $L$SEH_prolog_aesni_gcm_decrypt_16-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 168 + DW 4 + DB $L$SEH_prolog_aesni_gcm_decrypt_15-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 152 + DW 3 + DB $L$SEH_prolog_aesni_gcm_decrypt_14-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 136 + DW 2 + DB $L$SEH_prolog_aesni_gcm_decrypt_13-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 120 + DW 1 + DB $L$SEH_prolog_aesni_gcm_decrypt_12-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 104 + DW 0 + DB $L$SEH_prolog_aesni_gcm_decrypt_11-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 100 + DW 29 + DB $L$SEH_prolog_aesni_gcm_decrypt_10-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 116 + DW 28 + DB $L$SEH_prolog_aesni_gcm_decrypt_9-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 3 + DB $L$SEH_prolog_aesni_gcm_decrypt_8-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 1 + DW 21 + DB $L$SEH_prolog_aesni_gcm_decrypt_7-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 240 + DB $L$SEH_prolog_aesni_gcm_decrypt_6-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 224 + DB $L$SEH_prolog_aesni_gcm_decrypt_5-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 208 + DB $L$SEH_prolog_aesni_gcm_decrypt_4-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 192 + DB $L$SEH_prolog_aesni_gcm_decrypt_3-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 48 + DB $L$SEH_prolog_aesni_gcm_decrypt_2-$L$SEH_begin_aesni_gcm_decrypt_1 + DB 80 + +$L$SEH_info_aesni_gcm_encrypt_0: + DB 1 + DB $L$SEH_prolog_aesni_gcm_encrypt_21-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 33 + DB 213 + DB $L$SEH_prolog_aesni_gcm_encrypt_21-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 248 + DW 9 + DB $L$SEH_prolog_aesni_gcm_encrypt_20-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 232 + DW 8 + DB $L$SEH_prolog_aesni_gcm_encrypt_19-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 216 + DW 7 + DB $L$SEH_prolog_aesni_gcm_encrypt_18-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 200 + DW 6 + DB $L$SEH_prolog_aesni_gcm_encrypt_17-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 184 + DW 5 + DB $L$SEH_prolog_aesni_gcm_encrypt_16-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 168 + DW 4 + DB $L$SEH_prolog_aesni_gcm_encrypt_15-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 152 + DW 3 + DB $L$SEH_prolog_aesni_gcm_encrypt_14-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 136 + DW 2 + DB $L$SEH_prolog_aesni_gcm_encrypt_13-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 120 + DW 1 + DB $L$SEH_prolog_aesni_gcm_encrypt_12-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 104 + DW 0 + DB $L$SEH_prolog_aesni_gcm_encrypt_11-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 100 + DW 29 + DB $L$SEH_prolog_aesni_gcm_encrypt_10-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 116 + DW 28 + DB $L$SEH_prolog_aesni_gcm_encrypt_9-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 3 + DB $L$SEH_prolog_aesni_gcm_encrypt_8-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 1 + DW 21 + DB $L$SEH_prolog_aesni_gcm_encrypt_7-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 240 + DB $L$SEH_prolog_aesni_gcm_encrypt_6-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 224 + DB $L$SEH_prolog_aesni_gcm_encrypt_5-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 208 + DB $L$SEH_prolog_aesni_gcm_encrypt_4-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 192 + DB $L$SEH_prolog_aesni_gcm_encrypt_3-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 48 + DB $L$SEH_prolog_aesni_gcm_encrypt_2-$L$SEH_begin_aesni_gcm_encrypt_1 + DB 80 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-x86-apple.S b/yass/third_party/boringssl/src/gen/bcm/aesni-x86-apple.S new file mode 100644 index 0000000000..44676044c6 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-x86-apple.S @@ -0,0 +1,2475 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +#ifdef BORINGSSL_DISPATCH_TEST +#endif +.globl _aes_hw_encrypt +.private_extern _aes_hw_encrypt +.align 4 +_aes_hw_encrypt: +L_aes_hw_encrypt_begin: +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call L000pic +L000pic: + popl %ebx + leal _BORINGSSL_function_hit+1-L000pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L001enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L001enc1_loop_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.globl _aes_hw_decrypt +.private_extern _aes_hw_decrypt +.align 4 +_aes_hw_decrypt: +L_aes_hw_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L002dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L002dec1_loop_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.private_extern __aesni_encrypt2 +.align 4 +__aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L003enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L003enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.private_extern __aesni_decrypt2 +.align 4 +__aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L004dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L004dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.private_extern __aesni_encrypt3 +.align 4 +__aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L005enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz L005enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.private_extern __aesni_decrypt3 +.align 4 +__aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L006dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz L006dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.private_extern __aesni_encrypt4 +.align 4 +__aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L007enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz L007enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.private_extern __aesni_decrypt4 +.align 4 +__aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L008dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz L008dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.private_extern __aesni_encrypt6 +.align 4 +__aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp L009_aesni_encrypt6_inner +.align 4,0x90 +L010enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +L009_aesni_encrypt6_inner: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz L010enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.private_extern __aesni_decrypt6 +.align 4 +__aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp L011_aesni_decrypt6_inner +.align 4,0x90 +L012dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +L011_aesni_decrypt6_inner: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz L012dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.globl _aes_hw_ecb_encrypt +.private_extern _aes_hw_ecb_encrypt +.align 4 +_aes_hw_ecb_encrypt: +L_aes_hw_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz L013ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz L014ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L015ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L016ecb_enc_loop6_enter +.align 4,0x90 +L017ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L016ecb_enc_loop6_enter: + call __aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L017ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L013ecb_ret +L015ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L018ecb_enc_one + movups 16(%esi),%xmm3 + je L019ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L020ecb_enc_three + movups 48(%esi),%xmm5 + je L021ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L013ecb_ret +.align 4,0x90 +L018ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L022enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L022enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp L013ecb_ret +.align 4,0x90 +L019ecb_enc_two: + call __aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L013ecb_ret +.align 4,0x90 +L020ecb_enc_three: + call __aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L013ecb_ret +.align 4,0x90 +L021ecb_enc_four: + call __aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp L013ecb_ret +.align 4,0x90 +L014ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L023ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L024ecb_dec_loop6_enter +.align 4,0x90 +L025ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L024ecb_dec_loop6_enter: + call __aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L025ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L013ecb_ret +L023ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L026ecb_dec_one + movups 16(%esi),%xmm3 + je L027ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L028ecb_dec_three + movups 48(%esi),%xmm5 + je L029ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L013ecb_ret +.align 4,0x90 +L026ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L030dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L030dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp L013ecb_ret +.align 4,0x90 +L027ecb_dec_two: + call __aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L013ecb_ret +.align 4,0x90 +L028ecb_dec_three: + call __aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L013ecb_ret +.align 4,0x90 +L029ecb_dec_four: + call __aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L013ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aes_hw_ccm64_encrypt_blocks +.private_extern _aes_hw_ccm64_encrypt_blocks +.align 4 +_aes_hw_ccm64_encrypt_blocks: +L_aes_hw_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +L031ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +L032ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L032ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz L031ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aes_hw_ccm64_decrypt_blocks +.private_extern _aes_hw_ccm64_decrypt_blocks +.align 4 +_aes_hw_ccm64_decrypt_blocks: +L_aes_hw_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L033enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L033enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp L034ccm64_dec_outer +.align 4,0x90 +L034ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz L035ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +L036ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L036ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp L034ccm64_dec_outer +.align 4,0x90 +L035ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +L037enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L037enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aes_hw_ctr32_encrypt_blocks +.private_extern _aes_hw_ctr32_encrypt_blocks +.align 4 +_aes_hw_ctr32_encrypt_blocks: +L_aes_hw_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call L038pic +L038pic: + popl %ebx + leal _BORINGSSL_function_hit+0-L038pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je L039ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb L040ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp L041ctr32_loop6 +.align 4,0x90 +L041ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc L041ctr32_loop6 + addl $6,%eax + jz L042ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +L040ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb L043ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je L044ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb L045ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je L046ctr32_four + por %xmm7,%xmm6 + call __aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L042ctr32_ret +.align 4,0x90 +L039ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +L043ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L047enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L047enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp L042ctr32_ret +.align 4,0x90 +L044ctr32_two: + call __aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L042ctr32_ret +.align 4,0x90 +L045ctr32_three: + call __aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L042ctr32_ret +.align 4,0x90 +L046ctr32_four: + call __aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L042ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aes_hw_xts_encrypt +.private_extern _aes_hw_xts_encrypt +.align 4 +_aes_hw_xts_encrypt: +L_aes_hw_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L048enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L048enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc L049xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L050xts_enc_loop6 +.align 4,0x90 +L050xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L050xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L049xts_enc_short: + addl $96,%eax + jz L051xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L052xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L053xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L054xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L055xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L056xts_enc_done +.align 4,0x90 +L052xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L057enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L057enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L056xts_enc_done +.align 4,0x90 +L053xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L056xts_enc_done +.align 4,0x90 +L054xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L056xts_enc_done +.align 4,0x90 +L055xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L056xts_enc_done +.align 4,0x90 +L051xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L058xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp L059xts_enc_steal +.align 4,0x90 +L056xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L058xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +L059xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L059xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L060enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L060enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +L058xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aes_hw_xts_decrypt +.private_extern _aes_hw_xts_decrypt +.align 4 +_aes_hw_xts_decrypt: +L_aes_hw_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L061enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L061enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc L062xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L063xts_dec_loop6 +.align 4,0x90 +L063xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L063xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L062xts_dec_short: + addl $96,%eax + jz L064xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L065xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L066xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L067xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L068xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L069xts_dec_done +.align 4,0x90 +L065xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L070dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L070dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L069xts_dec_done +.align 4,0x90 +L066xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L069xts_dec_done +.align 4,0x90 +L067xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L069xts_dec_done +.align 4,0x90 +L068xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L069xts_dec_done +.align 4,0x90 +L064xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L071xts_dec_ret + movl %eax,112(%esp) + jmp L072xts_dec_only_one_more +.align 4,0x90 +L069xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L071xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +L072xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L073dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L073dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +L074xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L074xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L075dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L075dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +L071xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aes_hw_cbc_encrypt +.private_extern _aes_hw_cbc_encrypt +.align 4 +_aes_hw_cbc_encrypt: +L_aes_hw_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz L076cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je L077cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb L078cbc_enc_tail + subl $16,%eax + jmp L079cbc_enc_loop +.align 4,0x90 +L079cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +L080enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L080enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc L079cbc_enc_loop + addl $16,%eax + jnz L078cbc_enc_tail + movaps %xmm2,%xmm7 + pxor %xmm2,%xmm2 + jmp L081cbc_ret +L078cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp L079cbc_enc_loop +.align 4,0x90 +L077cbc_decrypt: + cmpl $80,%eax + jbe L082cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp L083cbc_dec_loop6_enter +.align 4,0x90 +L084cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +L083cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja L084cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle L085cbc_dec_clear_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +L082cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe L086cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe L087cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe L088cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe L089cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 + subl $80,%eax + jmp L090cbc_dec_tail_collected +.align 4,0x90 +L086cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L091dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L091dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp L090cbc_dec_tail_collected +.align 4,0x90 +L087cbc_dec_two: + call __aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp L090cbc_dec_tail_collected +.align 4,0x90 +L088cbc_dec_three: + call __aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp L090cbc_dec_tail_collected +.align 4,0x90 +L089cbc_dec_four: + call __aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 + subl $64,%eax + jmp L090cbc_dec_tail_collected +.align 4,0x90 +L085cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +L090cbc_dec_tail_collected: + andl $15,%eax + jnz L092cbc_dec_tail_partial + movups %xmm2,(%edi) + pxor %xmm0,%xmm0 + jmp L081cbc_ret +.align 4,0x90 +L092cbc_dec_tail_partial: + movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 + movdqa %xmm2,(%esp) +L081cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 + movups %xmm7,(%ebp) + pxor %xmm7,%xmm7 +L076cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.private_extern __aesni_set_encrypt_key +.align 4 +__aesni_set_encrypt_key: + pushl %ebp + pushl %ebx + testl %eax,%eax + jz L093bad_pointer + testl %edx,%edx + jz L093bad_pointer + call L094pic +L094pic: + popl %ebx + leal Lkey_const-L094pic(%ebx),%ebx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp + leal 16(%edx),%edx + andl $268437504,%ebp + cmpl $256,%ecx + je L09514rounds + cmpl $192,%ecx + je L09612rounds + cmpl $128,%ecx + jne L097bad_keybits +.align 4,0x90 +L09810rounds: + cmpl $268435456,%ebp + je L09910rounds_alt + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call L100key_128_cold +.byte 102,15,58,223,200,2 + call L101key_128 +.byte 102,15,58,223,200,4 + call L101key_128 +.byte 102,15,58,223,200,8 + call L101key_128 +.byte 102,15,58,223,200,16 + call L101key_128 +.byte 102,15,58,223,200,32 + call L101key_128 +.byte 102,15,58,223,200,64 + call L101key_128 +.byte 102,15,58,223,200,128 + call L101key_128 +.byte 102,15,58,223,200,27 + call L101key_128 +.byte 102,15,58,223,200,54 + call L101key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + jmp L102good_key +.align 4,0x90 +L101key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +L100key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L09910rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +L103loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz L103loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp L102good_key +.align 4,0x90 +L09612rounds: + movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je L10412rounds_alt + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call L105key_192a_cold +.byte 102,15,58,223,202,2 + call L106key_192b +.byte 102,15,58,223,202,4 + call L107key_192a +.byte 102,15,58,223,202,8 + call L106key_192b +.byte 102,15,58,223,202,16 + call L107key_192a +.byte 102,15,58,223,202,32 + call L106key_192b +.byte 102,15,58,223,202,64 + call L107key_192a +.byte 102,15,58,223,202,128 + call L106key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + jmp L102good_key +.align 4,0x90 +L107key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 4,0x90 +L105key_192a_cold: + movaps %xmm2,%xmm5 +L108key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 4,0x90 +L106key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp L108key_192b_warm +.align 4,0x90 +L10412rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +L109loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz L109loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp L102good_key +.align 4,0x90 +L09514rounds: + movups 16(%eax),%xmm2 + leal 16(%edx),%edx + cmpl $268435456,%ebp + je L11014rounds_alt + movl $13,%ecx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call L111key_256a_cold +.byte 102,15,58,223,200,1 + call L112key_256b +.byte 102,15,58,223,202,2 + call L113key_256a +.byte 102,15,58,223,200,2 + call L112key_256b +.byte 102,15,58,223,202,4 + call L113key_256a +.byte 102,15,58,223,200,4 + call L112key_256b +.byte 102,15,58,223,202,8 + call L113key_256a +.byte 102,15,58,223,200,8 + call L112key_256b +.byte 102,15,58,223,202,16 + call L113key_256a +.byte 102,15,58,223,200,16 + call L112key_256b +.byte 102,15,58,223,202,32 + call L113key_256a +.byte 102,15,58,223,200,32 + call L112key_256b +.byte 102,15,58,223,202,64 + call L113key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + jmp L102good_key +.align 4,0x90 +L113key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +L111key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L112key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 4,0x90 +L11014rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +L114loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz L115done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp L114loop_key256 +L115done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +L102good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret +.align 2,0x90 +L093bad_pointer: + movl $-1,%eax + popl %ebx + popl %ebp + ret +.align 2,0x90 +L097bad_keybits: + pxor %xmm0,%xmm0 + movl $-2,%eax + popl %ebx + popl %ebp + ret +.globl _aes_hw_set_encrypt_key +.private_extern _aes_hw_set_encrypt_key +.align 4 +_aes_hw_set_encrypt_key: +L_aes_hw_set_encrypt_key_begin: +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call L116pic +L116pic: + popl %ebx + leal _BORINGSSL_function_hit+3-L116pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + ret +.globl _aes_hw_set_decrypt_key +.private_extern _aes_hw_set_decrypt_key +.align 4 +_aes_hw_set_decrypt_key: +L_aes_hw_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz L117dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +L118dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja L118dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorl %eax,%eax +L117dec_key_ret: + ret +.align 6,0x90 +Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-x86-linux.S b/yass/third_party/boringssl/src/gen/bcm/aesni-x86-linux.S new file mode 100644 index 0000000000..54daf1808e --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-x86-linux.S @@ -0,0 +1,2511 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +#ifdef BORINGSSL_DISPATCH_TEST +#endif +.globl aes_hw_encrypt +.hidden aes_hw_encrypt +.type aes_hw_encrypt,@function +.align 16 +aes_hw_encrypt: +.L_aes_hw_encrypt_begin: +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call .L000pic +.L000pic: + popl %ebx + leal BORINGSSL_function_hit+1-.L000pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001enc1_loop_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.size aes_hw_encrypt,.-.L_aes_hw_encrypt_begin +.globl aes_hw_decrypt +.hidden aes_hw_decrypt +.type aes_hw_decrypt,@function +.align 16 +aes_hw_decrypt: +.L_aes_hw_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L002dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L002dec1_loop_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.size aes_hw_decrypt,.-.L_aes_hw_decrypt_begin +.hidden _aesni_encrypt2 +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.size _aesni_encrypt2,.-_aesni_encrypt2 +.hidden _aesni_decrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.size _aesni_decrypt2,.-_aesni_decrypt2 +.hidden _aesni_encrypt3 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.hidden _aesni_decrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L006dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.hidden _aesni_encrypt4 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.hidden _aesni_decrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L008dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L008dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.hidden _aesni_encrypt6 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L009_aesni_encrypt6_inner +.align 16 +.L010enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.L009_aesni_encrypt6_inner: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L010enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.hidden _aesni_decrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L011_aesni_decrypt6_inner +.align 16 +.L012dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.L011_aesni_decrypt6_inner: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L012dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aes_hw_ecb_encrypt +.hidden aes_hw_ecb_encrypt +.type aes_hw_ecb_encrypt,@function +.align 16 +aes_hw_ecb_encrypt: +.L_aes_hw_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L013ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L014ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L015ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L016ecb_enc_loop6_enter +.align 16 +.L017ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L016ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L017ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L013ecb_ret +.L015ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L018ecb_enc_one + movups 16(%esi),%xmm3 + je .L019ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L020ecb_enc_three + movups 48(%esi),%xmm5 + je .L021ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L013ecb_ret +.align 16 +.L018ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L022enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L022enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L013ecb_ret +.align 16 +.L019ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L013ecb_ret +.align 16 +.L020ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L013ecb_ret +.align 16 +.L021ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L013ecb_ret +.align 16 +.L014ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L023ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L024ecb_dec_loop6_enter +.align 16 +.L025ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L024ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L025ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L013ecb_ret +.L023ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L026ecb_dec_one + movups 16(%esi),%xmm3 + je .L027ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L028ecb_dec_three + movups 48(%esi),%xmm5 + je .L029ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L013ecb_ret +.align 16 +.L026ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L030dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L030dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L013ecb_ret +.align 16 +.L027ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L013ecb_ret +.align 16 +.L028ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L013ecb_ret +.align 16 +.L029ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L013ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_ecb_encrypt,.-.L_aes_hw_ecb_encrypt_begin +.globl aes_hw_ccm64_encrypt_blocks +.hidden aes_hw_ccm64_encrypt_blocks +.type aes_hw_ccm64_encrypt_blocks,@function +.align 16 +aes_hw_ccm64_encrypt_blocks: +.L_aes_hw_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +.L031ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +.L032ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L032ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz .L031ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_ccm64_encrypt_blocks,.-.L_aes_hw_ccm64_encrypt_blocks_begin +.globl aes_hw_ccm64_decrypt_blocks +.hidden aes_hw_ccm64_decrypt_blocks +.type aes_hw_ccm64_decrypt_blocks,@function +.align 16 +aes_hw_ccm64_decrypt_blocks: +.L_aes_hw_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L033enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L033enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L034ccm64_dec_outer +.align 16 +.L034ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L035ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +.L036ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L036ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp .L034ccm64_dec_outer +.align 16 +.L035ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L037enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L037enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_ccm64_decrypt_blocks,.-.L_aes_hw_ccm64_decrypt_blocks_begin +.globl aes_hw_ctr32_encrypt_blocks +.hidden aes_hw_ctr32_encrypt_blocks +.type aes_hw_ctr32_encrypt_blocks,@function +.align 16 +aes_hw_ctr32_encrypt_blocks: +.L_aes_hw_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call .L038pic +.L038pic: + popl %ebx + leal BORINGSSL_function_hit+0-.L038pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L039ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb .L040ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp .L041ctr32_loop6 +.align 16 +.L041ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc .L041ctr32_loop6 + addl $6,%eax + jz .L042ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L040ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L043ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je .L044ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L045ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je .L046ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L042ctr32_ret +.align 16 +.L039ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L043ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L047enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L047enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L042ctr32_ret +.align 16 +.L044ctr32_two: + call _aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L042ctr32_ret +.align 16 +.L045ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L042ctr32_ret +.align 16 +.L046ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L042ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_ctr32_encrypt_blocks,.-.L_aes_hw_ctr32_encrypt_blocks_begin +.globl aes_hw_xts_encrypt +.hidden aes_hw_xts_encrypt +.type aes_hw_xts_encrypt,@function +.align 16 +aes_hw_xts_encrypt: +.L_aes_hw_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L048enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L048enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L049xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L050xts_enc_loop6 +.align 16 +.L050xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L050xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L049xts_enc_short: + addl $96,%eax + jz .L051xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L052xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L053xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L054xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L055xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L056xts_enc_done +.align 16 +.L052xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L057enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L057enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L056xts_enc_done +.align 16 +.L053xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L056xts_enc_done +.align 16 +.L054xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L056xts_enc_done +.align 16 +.L055xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L056xts_enc_done +.align 16 +.L051xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L058xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L059xts_enc_steal +.align 16 +.L056xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L058xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L059xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L059xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L060enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L060enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L058xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_xts_encrypt,.-.L_aes_hw_xts_encrypt_begin +.globl aes_hw_xts_decrypt +.hidden aes_hw_xts_decrypt +.type aes_hw_xts_decrypt,@function +.align 16 +aes_hw_xts_decrypt: +.L_aes_hw_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L061enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L061enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L062xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L063xts_dec_loop6 +.align 16 +.L063xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L063xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L062xts_dec_short: + addl $96,%eax + jz .L064xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L065xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L066xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L067xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L068xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L069xts_dec_done +.align 16 +.L065xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L070dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L070dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L069xts_dec_done +.align 16 +.L066xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L069xts_dec_done +.align 16 +.L067xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L069xts_dec_done +.align 16 +.L068xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L069xts_dec_done +.align 16 +.L064xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L071xts_dec_ret + movl %eax,112(%esp) + jmp .L072xts_dec_only_one_more +.align 16 +.L069xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L071xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L072xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L073dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L073dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L074xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L074xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L075dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L075dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L071xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_xts_decrypt,.-.L_aes_hw_xts_decrypt_begin +.globl aes_hw_cbc_encrypt +.hidden aes_hw_cbc_encrypt +.type aes_hw_cbc_encrypt,@function +.align 16 +aes_hw_cbc_encrypt: +.L_aes_hw_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L076cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L077cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L078cbc_enc_tail + subl $16,%eax + jmp .L079cbc_enc_loop +.align 16 +.L079cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L080enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L080enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L079cbc_enc_loop + addl $16,%eax + jnz .L078cbc_enc_tail + movaps %xmm2,%xmm7 + pxor %xmm2,%xmm2 + jmp .L081cbc_ret +.L078cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L079cbc_enc_loop +.align 16 +.L077cbc_decrypt: + cmpl $80,%eax + jbe .L082cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L083cbc_dec_loop6_enter +.align 16 +.L084cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L083cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L084cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L085cbc_dec_clear_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L082cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L086cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L087cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L088cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L089cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 + subl $80,%eax + jmp .L090cbc_dec_tail_collected +.align 16 +.L086cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L091dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L091dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L090cbc_dec_tail_collected +.align 16 +.L087cbc_dec_two: + call _aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L090cbc_dec_tail_collected +.align 16 +.L088cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L090cbc_dec_tail_collected +.align 16 +.L089cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 + subl $64,%eax + jmp .L090cbc_dec_tail_collected +.align 16 +.L085cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L090cbc_dec_tail_collected: + andl $15,%eax + jnz .L092cbc_dec_tail_partial + movups %xmm2,(%edi) + pxor %xmm0,%xmm0 + jmp .L081cbc_ret +.align 16 +.L092cbc_dec_tail_partial: + movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 + movdqa %xmm2,(%esp) +.L081cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 + movups %xmm7,(%ebp) + pxor %xmm7,%xmm7 +.L076cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aes_hw_cbc_encrypt,.-.L_aes_hw_cbc_encrypt_begin +.hidden _aesni_set_encrypt_key +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + pushl %ebp + pushl %ebx + testl %eax,%eax + jz .L093bad_pointer + testl %edx,%edx + jz .L093bad_pointer + call .L094pic +.L094pic: + popl %ebx + leal .Lkey_const-.L094pic(%ebx),%ebx + leal OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp + leal 16(%edx),%edx + andl $268437504,%ebp + cmpl $256,%ecx + je .L09514rounds + cmpl $192,%ecx + je .L09612rounds + cmpl $128,%ecx + jne .L097bad_keybits +.align 16 +.L09810rounds: + cmpl $268435456,%ebp + je .L09910rounds_alt + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L100key_128_cold +.byte 102,15,58,223,200,2 + call .L101key_128 +.byte 102,15,58,223,200,4 + call .L101key_128 +.byte 102,15,58,223,200,8 + call .L101key_128 +.byte 102,15,58,223,200,16 + call .L101key_128 +.byte 102,15,58,223,200,32 + call .L101key_128 +.byte 102,15,58,223,200,64 + call .L101key_128 +.byte 102,15,58,223,200,128 + call .L101key_128 +.byte 102,15,58,223,200,27 + call .L101key_128 +.byte 102,15,58,223,200,54 + call .L101key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + jmp .L102good_key +.align 16 +.L101key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L100key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L09910rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L103loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L103loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L102good_key +.align 16 +.L09612rounds: + movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L10412rounds_alt + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L105key_192a_cold +.byte 102,15,58,223,202,2 + call .L106key_192b +.byte 102,15,58,223,202,4 + call .L107key_192a +.byte 102,15,58,223,202,8 + call .L106key_192b +.byte 102,15,58,223,202,16 + call .L107key_192a +.byte 102,15,58,223,202,32 + call .L106key_192b +.byte 102,15,58,223,202,64 + call .L107key_192a +.byte 102,15,58,223,202,128 + call .L106key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + jmp .L102good_key +.align 16 +.L107key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L105key_192a_cold: + movaps %xmm2,%xmm5 +.L108key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L106key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L108key_192b_warm +.align 16 +.L10412rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L109loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L109loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L102good_key +.align 16 +.L09514rounds: + movups 16(%eax),%xmm2 + leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L11014rounds_alt + movl $13,%ecx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L111key_256a_cold +.byte 102,15,58,223,200,1 + call .L112key_256b +.byte 102,15,58,223,202,2 + call .L113key_256a +.byte 102,15,58,223,200,2 + call .L112key_256b +.byte 102,15,58,223,202,4 + call .L113key_256a +.byte 102,15,58,223,200,4 + call .L112key_256b +.byte 102,15,58,223,202,8 + call .L113key_256a +.byte 102,15,58,223,200,8 + call .L112key_256b +.byte 102,15,58,223,202,16 + call .L113key_256a +.byte 102,15,58,223,200,16 + call .L112key_256b +.byte 102,15,58,223,202,32 + call .L113key_256a +.byte 102,15,58,223,200,32 + call .L112key_256b +.byte 102,15,58,223,202,64 + call .L113key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + jmp .L102good_key +.align 16 +.L113key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L111key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L112key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 16 +.L11014rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L114loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L115done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L114loop_key256 +.L115done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L102good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L093bad_pointer: + movl $-1,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L097bad_keybits: + pxor %xmm0,%xmm0 + movl $-2,%eax + popl %ebx + popl %ebp + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aes_hw_set_encrypt_key +.hidden aes_hw_set_encrypt_key +.type aes_hw_set_encrypt_key,@function +.align 16 +aes_hw_set_encrypt_key: +.L_aes_hw_set_encrypt_key_begin: +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call .L116pic +.L116pic: + popl %ebx + leal BORINGSSL_function_hit+3-.L116pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aes_hw_set_encrypt_key,.-.L_aes_hw_set_encrypt_key_begin +.globl aes_hw_set_decrypt_key +.hidden aes_hw_set_decrypt_key +.type aes_hw_set_decrypt_key,@function +.align 16 +aes_hw_set_decrypt_key: +.L_aes_hw_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L117dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L118dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L118dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorl %eax,%eax +.L117dec_key_ret: + ret +.size aes_hw_set_decrypt_key,.-.L_aes_hw_set_decrypt_key_begin +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-x86-win.asm b/yass/third_party/boringssl/src/gen/bcm/aesni-x86-win.asm new file mode 100644 index 0000000000..19b1d98bea --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-x86-win.asm @@ -0,0 +1,2466 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +%ifdef BORINGSSL_DISPATCH_TEST +extern _BORINGSSL_function_hit +%endif +global _aes_hw_encrypt +align 16 +_aes_hw_encrypt: +L$_aes_hw_encrypt_begin: +%ifdef BORINGSSL_DISPATCH_TEST + push ebx + push edx + call L$000pic +L$000pic: + pop ebx + lea ebx,[(_BORINGSSL_function_hit+1-L$000pic)+ebx] + mov edx,1 + mov BYTE [ebx],dl + pop edx + pop ebx +%endif + mov eax,DWORD [4+esp] + mov edx,DWORD [12+esp] + movups xmm2,[eax] + mov ecx,DWORD [240+edx] + mov eax,DWORD [8+esp] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$001enc1_loop_1: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$001enc1_loop_1 +db 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups [eax],xmm2 + pxor xmm2,xmm2 + ret +global _aes_hw_decrypt +align 16 +_aes_hw_decrypt: +L$_aes_hw_decrypt_begin: + mov eax,DWORD [4+esp] + mov edx,DWORD [12+esp] + movups xmm2,[eax] + mov ecx,DWORD [240+edx] + mov eax,DWORD [8+esp] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$002dec1_loop_2: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$002dec1_loop_2 +db 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups [eax],xmm2 + pxor xmm2,xmm2 + ret +align 16 +__aesni_encrypt2: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$003enc2_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$003enc2_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,221,208 +db 102,15,56,221,216 + ret +align 16 +__aesni_decrypt2: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$004dec2_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$004dec2_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,223,208 +db 102,15,56,223,216 + ret +align 16 +__aesni_encrypt3: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$005enc3_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 +db 102,15,56,220,224 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$005enc3_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,221,208 +db 102,15,56,221,216 +db 102,15,56,221,224 + ret +align 16 +__aesni_decrypt3: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$006dec3_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 +db 102,15,56,222,224 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$006dec3_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,223,208 +db 102,15,56,223,216 +db 102,15,56,223,224 + ret +align 16 +__aesni_encrypt4: + movups xmm0,[edx] + movups xmm1,[16+edx] + shl ecx,4 + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx +db 15,31,64,0 + add ecx,16 +L$007enc4_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,220,233 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 +db 102,15,56,220,224 +db 102,15,56,220,232 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$007enc4_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,221,208 +db 102,15,56,221,216 +db 102,15,56,221,224 +db 102,15,56,221,232 + ret +align 16 +__aesni_decrypt4: + movups xmm0,[edx] + movups xmm1,[16+edx] + shl ecx,4 + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx +db 15,31,64,0 + add ecx,16 +L$008dec4_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,222,233 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 +db 102,15,56,222,224 +db 102,15,56,222,232 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$008dec4_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,222,233 +db 102,15,56,223,208 +db 102,15,56,223,216 +db 102,15,56,223,224 +db 102,15,56,223,232 + ret +align 16 +__aesni_encrypt6: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +db 102,15,56,220,209 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +db 102,15,56,220,217 + lea edx,[32+ecx*1+edx] + neg ecx +db 102,15,56,220,225 + pxor xmm7,xmm0 + movups xmm0,[ecx*1+edx] + add ecx,16 + jmp NEAR L$009_aesni_encrypt6_inner +align 16 +L$010enc6_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +L$009_aesni_encrypt6_inner: +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 +L$_aesni_encrypt6_enter: + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 +db 102,15,56,220,224 +db 102,15,56,220,232 +db 102,15,56,220,240 +db 102,15,56,220,248 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$010enc6_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 +db 102,15,56,221,208 +db 102,15,56,221,216 +db 102,15,56,221,224 +db 102,15,56,221,232 +db 102,15,56,221,240 +db 102,15,56,221,248 + ret +align 16 +__aesni_decrypt6: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +db 102,15,56,222,209 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +db 102,15,56,222,217 + lea edx,[32+ecx*1+edx] + neg ecx +db 102,15,56,222,225 + pxor xmm7,xmm0 + movups xmm0,[ecx*1+edx] + add ecx,16 + jmp NEAR L$011_aesni_decrypt6_inner +align 16 +L$012dec6_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +L$011_aesni_decrypt6_inner: +db 102,15,56,222,233 +db 102,15,56,222,241 +db 102,15,56,222,249 +L$_aesni_decrypt6_enter: + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 +db 102,15,56,222,224 +db 102,15,56,222,232 +db 102,15,56,222,240 +db 102,15,56,222,248 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$012dec6_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,222,233 +db 102,15,56,222,241 +db 102,15,56,222,249 +db 102,15,56,223,208 +db 102,15,56,223,216 +db 102,15,56,223,224 +db 102,15,56,223,232 +db 102,15,56,223,240 +db 102,15,56,223,248 + ret +global _aes_hw_ecb_encrypt +align 16 +_aes_hw_ecb_encrypt: +L$_aes_hw_ecb_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + and eax,-16 + jz NEAR L$013ecb_ret + mov ecx,DWORD [240+edx] + test ebx,ebx + jz NEAR L$014ecb_decrypt + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb NEAR L$015ecb_enc_tail + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + movdqu xmm5,[48+esi] + movdqu xmm6,[64+esi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] + sub eax,96 + jmp NEAR L$016ecb_enc_loop6_enter +align 16 +L$017ecb_enc_loop6: + movups [edi],xmm2 + movdqu xmm2,[esi] + movups [16+edi],xmm3 + movdqu xmm3,[16+esi] + movups [32+edi],xmm4 + movdqu xmm4,[32+esi] + movups [48+edi],xmm5 + movdqu xmm5,[48+esi] + movups [64+edi],xmm6 + movdqu xmm6,[64+esi] + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] +L$016ecb_enc_loop6_enter: + call __aesni_encrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc NEAR L$017ecb_enc_loop6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + movups [80+edi],xmm7 + lea edi,[96+edi] + add eax,96 + jz NEAR L$013ecb_ret +L$015ecb_enc_tail: + movups xmm2,[esi] + cmp eax,32 + jb NEAR L$018ecb_enc_one + movups xmm3,[16+esi] + je NEAR L$019ecb_enc_two + movups xmm4,[32+esi] + cmp eax,64 + jb NEAR L$020ecb_enc_three + movups xmm5,[48+esi] + je NEAR L$021ecb_enc_four + movups xmm6,[64+esi] + xorps xmm7,xmm7 + call __aesni_encrypt6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + jmp NEAR L$013ecb_ret +align 16 +L$018ecb_enc_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$022enc1_loop_3: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$022enc1_loop_3 +db 102,15,56,221,209 + movups [edi],xmm2 + jmp NEAR L$013ecb_ret +align 16 +L$019ecb_enc_two: + call __aesni_encrypt2 + movups [edi],xmm2 + movups [16+edi],xmm3 + jmp NEAR L$013ecb_ret +align 16 +L$020ecb_enc_three: + call __aesni_encrypt3 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + jmp NEAR L$013ecb_ret +align 16 +L$021ecb_enc_four: + call __aesni_encrypt4 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + jmp NEAR L$013ecb_ret +align 16 +L$014ecb_decrypt: + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb NEAR L$023ecb_dec_tail + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + movdqu xmm5,[48+esi] + movdqu xmm6,[64+esi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] + sub eax,96 + jmp NEAR L$024ecb_dec_loop6_enter +align 16 +L$025ecb_dec_loop6: + movups [edi],xmm2 + movdqu xmm2,[esi] + movups [16+edi],xmm3 + movdqu xmm3,[16+esi] + movups [32+edi],xmm4 + movdqu xmm4,[32+esi] + movups [48+edi],xmm5 + movdqu xmm5,[48+esi] + movups [64+edi],xmm6 + movdqu xmm6,[64+esi] + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] +L$024ecb_dec_loop6_enter: + call __aesni_decrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc NEAR L$025ecb_dec_loop6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + movups [80+edi],xmm7 + lea edi,[96+edi] + add eax,96 + jz NEAR L$013ecb_ret +L$023ecb_dec_tail: + movups xmm2,[esi] + cmp eax,32 + jb NEAR L$026ecb_dec_one + movups xmm3,[16+esi] + je NEAR L$027ecb_dec_two + movups xmm4,[32+esi] + cmp eax,64 + jb NEAR L$028ecb_dec_three + movups xmm5,[48+esi] + je NEAR L$029ecb_dec_four + movups xmm6,[64+esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + jmp NEAR L$013ecb_ret +align 16 +L$026ecb_dec_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$030dec1_loop_4: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$030dec1_loop_4 +db 102,15,56,223,209 + movups [edi],xmm2 + jmp NEAR L$013ecb_ret +align 16 +L$027ecb_dec_two: + call __aesni_decrypt2 + movups [edi],xmm2 + movups [16+edi],xmm3 + jmp NEAR L$013ecb_ret +align 16 +L$028ecb_dec_three: + call __aesni_decrypt3 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + jmp NEAR L$013ecb_ret +align 16 +L$029ecb_dec_four: + call __aesni_decrypt4 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 +L$013ecb_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + pop edi + pop esi + pop ebx + pop ebp + ret +global _aes_hw_ccm64_encrypt_blocks +align 16 +_aes_hw_ccm64_encrypt_blocks: +L$_aes_hw_ccm64_encrypt_blocks_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ecx,DWORD [40+esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD [48+esp],ebp + movdqu xmm7,[ebx] + movdqu xmm3,[ecx] + mov ecx,DWORD [240+edx] + mov DWORD [esp],202182159 + mov DWORD [4+esp],134810123 + mov DWORD [8+esp],67438087 + mov DWORD [12+esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD [16+esp],ebx + mov DWORD [20+esp],ebp + mov DWORD [24+esp],ebp + mov DWORD [28+esp],ebp + shl ecx,4 + mov ebx,16 + lea ebp,[edx] + movdqa xmm5,[esp] + movdqa xmm2,xmm7 + lea edx,[32+ecx*1+edx] + sub ebx,ecx +db 102,15,56,0,253 +L$031ccm64_enc_outer: + movups xmm0,[ebp] + mov ecx,ebx + movups xmm6,[esi] + xorps xmm2,xmm0 + movups xmm1,[16+ebp] + xorps xmm0,xmm6 + xorps xmm3,xmm0 + movups xmm0,[32+ebp] +L$032ccm64_enc2_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$032ccm64_enc2_loop +db 102,15,56,220,209 +db 102,15,56,220,217 + paddq xmm7,[16+esp] + dec eax +db 102,15,56,221,208 +db 102,15,56,221,216 + lea esi,[16+esi] + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups [edi],xmm6 +db 102,15,56,0,213 + lea edi,[16+edi] + jnz NEAR L$031ccm64_enc_outer + mov esp,DWORD [48+esp] + mov edi,DWORD [40+esp] + movups [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + pop edi + pop esi + pop ebx + pop ebp + ret +global _aes_hw_ccm64_decrypt_blocks +align 16 +_aes_hw_ccm64_decrypt_blocks: +L$_aes_hw_ccm64_decrypt_blocks_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ecx,DWORD [40+esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD [48+esp],ebp + movdqu xmm7,[ebx] + movdqu xmm3,[ecx] + mov ecx,DWORD [240+edx] + mov DWORD [esp],202182159 + mov DWORD [4+esp],134810123 + mov DWORD [8+esp],67438087 + mov DWORD [12+esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD [16+esp],ebx + mov DWORD [20+esp],ebp + mov DWORD [24+esp],ebp + mov DWORD [28+esp],ebp + movdqa xmm5,[esp] + movdqa xmm2,xmm7 + mov ebp,edx + mov ebx,ecx +db 102,15,56,0,253 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$033enc1_loop_5: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$033enc1_loop_5 +db 102,15,56,221,209 + shl ebx,4 + mov ecx,16 + movups xmm6,[esi] + paddq xmm7,[16+esp] + lea esi,[16+esi] + sub ecx,ebx + lea edx,[32+ebx*1+ebp] + mov ebx,ecx + jmp NEAR L$034ccm64_dec_outer +align 16 +L$034ccm64_dec_outer: + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups [edi],xmm6 + lea edi,[16+edi] +db 102,15,56,0,213 + sub eax,1 + jz NEAR L$035ccm64_dec_break + movups xmm0,[ebp] + mov ecx,ebx + movups xmm1,[16+ebp] + xorps xmm6,xmm0 + xorps xmm2,xmm0 + xorps xmm3,xmm6 + movups xmm0,[32+ebp] +L$036ccm64_dec2_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$036ccm64_dec2_loop + movups xmm6,[esi] + paddq xmm7,[16+esp] +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,221,208 +db 102,15,56,221,216 + lea esi,[16+esi] + jmp NEAR L$034ccm64_dec_outer +align 16 +L$035ccm64_dec_break: + mov ecx,DWORD [240+ebp] + mov edx,ebp + movups xmm0,[edx] + movups xmm1,[16+edx] + xorps xmm6,xmm0 + lea edx,[32+edx] + xorps xmm3,xmm6 +L$037enc1_loop_6: +db 102,15,56,220,217 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$037enc1_loop_6 +db 102,15,56,221,217 + mov esp,DWORD [48+esp] + mov edi,DWORD [40+esp] + movups [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + pop edi + pop esi + pop ebx + pop ebp + ret +global _aes_hw_ctr32_encrypt_blocks +align 16 +_aes_hw_ctr32_encrypt_blocks: +L$_aes_hw_ctr32_encrypt_blocks_begin: + push ebp + push ebx + push esi + push edi +%ifdef BORINGSSL_DISPATCH_TEST + push ebx + push edx + call L$038pic +L$038pic: + pop ebx + lea ebx,[(_BORINGSSL_function_hit+0-L$038pic)+ebx] + mov edx,1 + mov BYTE [ebx],dl + pop edx + pop ebx +%endif + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ebp,esp + sub esp,88 + and esp,-16 + mov DWORD [80+esp],ebp + cmp eax,1 + je NEAR L$039ctr32_one_shortcut + movdqu xmm7,[ebx] + mov DWORD [esp],202182159 + mov DWORD [4+esp],134810123 + mov DWORD [8+esp],67438087 + mov DWORD [12+esp],66051 + mov ecx,6 + xor ebp,ebp + mov DWORD [16+esp],ecx + mov DWORD [20+esp],ecx + mov DWORD [24+esp],ecx + mov DWORD [28+esp],ebp +db 102,15,58,22,251,3 +db 102,15,58,34,253,3 + mov ecx,DWORD [240+edx] + bswap ebx + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqa xmm2,[esp] +db 102,15,58,34,195,0 + lea ebp,[3+ebx] +db 102,15,58,34,205,0 + inc ebx +db 102,15,58,34,195,1 + inc ebp +db 102,15,58,34,205,1 + inc ebx +db 102,15,58,34,195,2 + inc ebp +db 102,15,58,34,205,2 + movdqa [48+esp],xmm0 +db 102,15,56,0,194 + movdqu xmm6,[edx] + movdqa [64+esp],xmm1 +db 102,15,56,0,202 + pshufd xmm2,xmm0,192 + pshufd xmm3,xmm0,128 + cmp eax,6 + jb NEAR L$040ctr32_tail + pxor xmm7,xmm6 + shl ecx,4 + mov ebx,16 + movdqa [32+esp],xmm7 + mov ebp,edx + sub ebx,ecx + lea edx,[32+ecx*1+edx] + sub eax,6 + jmp NEAR L$041ctr32_loop6 +align 16 +L$041ctr32_loop6: + pshufd xmm4,xmm0,64 + movdqa xmm0,[32+esp] + pshufd xmm5,xmm1,192 + pxor xmm2,xmm0 + pshufd xmm6,xmm1,128 + pxor xmm3,xmm0 + pshufd xmm7,xmm1,64 + movups xmm1,[16+ebp] + pxor xmm4,xmm0 + pxor xmm5,xmm0 +db 102,15,56,220,209 + pxor xmm6,xmm0 + pxor xmm7,xmm0 +db 102,15,56,220,217 + movups xmm0,[32+ebp] + mov ecx,ebx +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 + call L$_aesni_encrypt6_enter + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,xmm1 + movups xmm1,[32+esi] + xorps xmm3,xmm0 + movups [edi],xmm2 + movdqa xmm0,[16+esp] + xorps xmm4,xmm1 + movdqa xmm1,[64+esp] + movups [16+edi],xmm3 + movups [32+edi],xmm4 + paddd xmm1,xmm0 + paddd xmm0,[48+esp] + movdqa xmm2,[esp] + movups xmm3,[48+esi] + movups xmm4,[64+esi] + xorps xmm5,xmm3 + movups xmm3,[80+esi] + lea esi,[96+esi] + movdqa [48+esp],xmm0 +db 102,15,56,0,194 + xorps xmm6,xmm4 + movups [48+edi],xmm5 + xorps xmm7,xmm3 + movdqa [64+esp],xmm1 +db 102,15,56,0,202 + movups [64+edi],xmm6 + pshufd xmm2,xmm0,192 + movups [80+edi],xmm7 + lea edi,[96+edi] + pshufd xmm3,xmm0,128 + sub eax,6 + jnc NEAR L$041ctr32_loop6 + add eax,6 + jz NEAR L$042ctr32_ret + movdqu xmm7,[ebp] + mov edx,ebp + pxor xmm7,[32+esp] + mov ecx,DWORD [240+ebp] +L$040ctr32_tail: + por xmm2,xmm7 + cmp eax,2 + jb NEAR L$043ctr32_one + pshufd xmm4,xmm0,64 + por xmm3,xmm7 + je NEAR L$044ctr32_two + pshufd xmm5,xmm1,192 + por xmm4,xmm7 + cmp eax,4 + jb NEAR L$045ctr32_three + pshufd xmm6,xmm1,128 + por xmm5,xmm7 + je NEAR L$046ctr32_four + por xmm6,xmm7 + call __aesni_encrypt6 + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,xmm1 + movups xmm1,[32+esi] + xorps xmm3,xmm0 + movups xmm0,[48+esi] + xorps xmm4,xmm1 + movups xmm1,[64+esi] + xorps xmm5,xmm0 + movups [edi],xmm2 + xorps xmm6,xmm1 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + jmp NEAR L$042ctr32_ret +align 16 +L$039ctr32_one_shortcut: + movups xmm2,[ebx] + mov ecx,DWORD [240+edx] +L$043ctr32_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$047enc1_loop_7: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$047enc1_loop_7 +db 102,15,56,221,209 + movups xmm6,[esi] + xorps xmm6,xmm2 + movups [edi],xmm6 + jmp NEAR L$042ctr32_ret +align 16 +L$044ctr32_two: + call __aesni_encrypt2 + movups xmm5,[esi] + movups xmm6,[16+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups [edi],xmm2 + movups [16+edi],xmm3 + jmp NEAR L$042ctr32_ret +align 16 +L$045ctr32_three: + call __aesni_encrypt3 + movups xmm5,[esi] + movups xmm6,[16+esi] + xorps xmm2,xmm5 + movups xmm7,[32+esi] + xorps xmm3,xmm6 + movups [edi],xmm2 + xorps xmm4,xmm7 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + jmp NEAR L$042ctr32_ret +align 16 +L$046ctr32_four: + call __aesni_encrypt4 + movups xmm6,[esi] + movups xmm7,[16+esi] + movups xmm1,[32+esi] + xorps xmm2,xmm6 + movups xmm0,[48+esi] + xorps xmm3,xmm7 + movups [edi],xmm2 + xorps xmm4,xmm1 + movups [16+edi],xmm3 + xorps xmm5,xmm0 + movups [32+edi],xmm4 + movups [48+edi],xmm5 +L$042ctr32_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + movdqa [32+esp],xmm0 + pxor xmm5,xmm5 + movdqa [48+esp],xmm0 + pxor xmm6,xmm6 + movdqa [64+esp],xmm0 + pxor xmm7,xmm7 + mov esp,DWORD [80+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _aes_hw_xts_encrypt +align 16 +_aes_hw_xts_encrypt: +L$_aes_hw_xts_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov edx,DWORD [36+esp] + mov esi,DWORD [40+esp] + mov ecx,DWORD [240+edx] + movups xmm2,[esi] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$048enc1_loop_8: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$048enc1_loop_8 +db 102,15,56,221,209 + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebp,esp + sub esp,120 + mov ecx,DWORD [240+edx] + and esp,-16 + mov DWORD [96+esp],135 + mov DWORD [100+esp],0 + mov DWORD [104+esp],1 + mov DWORD [108+esp],0 + mov DWORD [112+esp],eax + mov DWORD [116+esp],ebp + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,[96+esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + mov ebp,edx + mov ebx,ecx + sub eax,96 + jc NEAR L$049xts_enc_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,[32+ecx*1+edx] + jmp NEAR L$050xts_enc_loop6 +align 16 +L$050xts_enc_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [16+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [32+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa [64+esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,[ebp] + pand xmm7,xmm3 + movups xmm2,[esi] + pxor xmm7,xmm1 + mov ecx,ebx + movdqu xmm3,[16+esi] + xorps xmm2,xmm0 + movdqu xmm4,[32+esi] + pxor xmm3,xmm0 + movdqu xmm5,[48+esi] + pxor xmm4,xmm0 + movdqu xmm6,[64+esi] + pxor xmm5,xmm0 + movdqu xmm1,[80+esi] + pxor xmm6,xmm0 + lea esi,[96+esi] + pxor xmm2,[esp] + movdqa [80+esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,[16+ebp] + pxor xmm3,[16+esp] + pxor xmm4,[32+esp] +db 102,15,56,220,209 + pxor xmm5,[48+esp] + pxor xmm6,[64+esp] +db 102,15,56,220,217 + pxor xmm7,xmm0 + movups xmm0,[32+ebp] +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 + call L$_aesni_encrypt6_enter + movdqa xmm1,[80+esp] + pxor xmm0,xmm0 + xorps xmm2,[esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,[16+esp] + movups [edi],xmm2 + xorps xmm4,[32+esp] + movups [16+edi],xmm3 + xorps xmm5,[48+esp] + movups [32+edi],xmm4 + xorps xmm6,[64+esp] + movups [48+edi],xmm5 + xorps xmm7,xmm1 + movups [64+edi],xmm6 + pshufd xmm2,xmm0,19 + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqa xmm3,[96+esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + sub eax,96 + jnc NEAR L$050xts_enc_loop6 + mov ecx,DWORD [240+ebp] + mov edx,ebp + mov ebx,ecx +L$049xts_enc_short: + add eax,96 + jz NEAR L$051xts_enc_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb NEAR L$052xts_enc_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je NEAR L$053xts_enc_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb NEAR L$054xts_enc_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa [esp],xmm5 + movdqa [16+esp],xmm6 + je NEAR L$055xts_enc_four + movdqa [32+esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + pxor xmm2,[esp] + movdqu xmm5,[48+esi] + pxor xmm3,[16+esp] + movdqu xmm6,[64+esi] + pxor xmm4,[32+esp] + lea esi,[80+esi] + pxor xmm5,[48+esp] + movdqa [64+esp],xmm7 + pxor xmm6,xmm7 + call __aesni_encrypt6 + movaps xmm1,[64+esp] + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,[32+esp] + movups [edi],xmm2 + xorps xmm5,[48+esp] + movups [16+edi],xmm3 + xorps xmm6,xmm1 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + lea edi,[80+edi] + jmp NEAR L$056xts_enc_done +align 16 +L$052xts_enc_one: + movups xmm2,[esi] + lea esi,[16+esi] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$057enc1_loop_9: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$057enc1_loop_9 +db 102,15,56,221,209 + xorps xmm2,xmm5 + movups [edi],xmm2 + lea edi,[16+edi] + movdqa xmm1,xmm5 + jmp NEAR L$056xts_enc_done +align 16 +L$053xts_enc_two: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + lea esi,[32+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_encrypt2 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups [edi],xmm2 + movups [16+edi],xmm3 + lea edi,[32+edi] + movdqa xmm1,xmm6 + jmp NEAR L$056xts_enc_done +align 16 +L$054xts_enc_three: + movaps xmm7,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + lea esi,[48+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_encrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + lea edi,[48+edi] + movdqa xmm1,xmm7 + jmp NEAR L$056xts_enc_done +align 16 +L$055xts_enc_four: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + xorps xmm2,[esp] + movups xmm5,[48+esi] + lea esi,[64+esi] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_encrypt4 + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + movups [edi],xmm2 + xorps xmm5,xmm6 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + lea edi,[64+edi] + movdqa xmm1,xmm6 + jmp NEAR L$056xts_enc_done +align 16 +L$051xts_enc_done6x: + mov eax,DWORD [112+esp] + and eax,15 + jz NEAR L$058xts_enc_ret + movdqa xmm5,xmm1 + mov DWORD [112+esp],eax + jmp NEAR L$059xts_enc_steal +align 16 +L$056xts_enc_done: + mov eax,DWORD [112+esp] + pxor xmm0,xmm0 + and eax,15 + jz NEAR L$058xts_enc_ret + pcmpgtd xmm0,xmm1 + mov DWORD [112+esp],eax + pshufd xmm5,xmm0,19 + paddq xmm1,xmm1 + pand xmm5,[96+esp] + pxor xmm5,xmm1 +L$059xts_enc_steal: + movzx ecx,BYTE [esi] + movzx edx,BYTE [edi-16] + lea esi,[1+esi] + mov BYTE [edi-16],cl + mov BYTE [edi],dl + lea edi,[1+edi] + sub eax,1 + jnz NEAR L$059xts_enc_steal + sub edi,DWORD [112+esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,[edi-16] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$060enc1_loop_10: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$060enc1_loop_10 +db 102,15,56,221,209 + xorps xmm2,xmm5 + movups [edi-16],xmm2 +L$058xts_enc_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa [esp],xmm0 + pxor xmm3,xmm3 + movdqa [16+esp],xmm0 + pxor xmm4,xmm4 + movdqa [32+esp],xmm0 + pxor xmm5,xmm5 + movdqa [48+esp],xmm0 + pxor xmm6,xmm6 + movdqa [64+esp],xmm0 + pxor xmm7,xmm7 + movdqa [80+esp],xmm0 + mov esp,DWORD [116+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _aes_hw_xts_decrypt +align 16 +_aes_hw_xts_decrypt: +L$_aes_hw_xts_decrypt_begin: + push ebp + push ebx + push esi + push edi + mov edx,DWORD [36+esp] + mov esi,DWORD [40+esp] + mov ecx,DWORD [240+edx] + movups xmm2,[esi] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$061enc1_loop_11: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$061enc1_loop_11 +db 102,15,56,221,209 + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebp,esp + sub esp,120 + and esp,-16 + xor ebx,ebx + test eax,15 + setnz bl + shl ebx,4 + sub eax,ebx + mov DWORD [96+esp],135 + mov DWORD [100+esp],0 + mov DWORD [104+esp],1 + mov DWORD [108+esp],0 + mov DWORD [112+esp],eax + mov DWORD [116+esp],ebp + mov ecx,DWORD [240+edx] + mov ebp,edx + mov ebx,ecx + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,[96+esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + sub eax,96 + jc NEAR L$062xts_dec_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,[32+ecx*1+edx] + jmp NEAR L$063xts_dec_loop6 +align 16 +L$063xts_dec_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [16+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [32+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa [64+esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,[ebp] + pand xmm7,xmm3 + movups xmm2,[esi] + pxor xmm7,xmm1 + mov ecx,ebx + movdqu xmm3,[16+esi] + xorps xmm2,xmm0 + movdqu xmm4,[32+esi] + pxor xmm3,xmm0 + movdqu xmm5,[48+esi] + pxor xmm4,xmm0 + movdqu xmm6,[64+esi] + pxor xmm5,xmm0 + movdqu xmm1,[80+esi] + pxor xmm6,xmm0 + lea esi,[96+esi] + pxor xmm2,[esp] + movdqa [80+esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,[16+ebp] + pxor xmm3,[16+esp] + pxor xmm4,[32+esp] +db 102,15,56,222,209 + pxor xmm5,[48+esp] + pxor xmm6,[64+esp] +db 102,15,56,222,217 + pxor xmm7,xmm0 + movups xmm0,[32+ebp] +db 102,15,56,222,225 +db 102,15,56,222,233 +db 102,15,56,222,241 +db 102,15,56,222,249 + call L$_aesni_decrypt6_enter + movdqa xmm1,[80+esp] + pxor xmm0,xmm0 + xorps xmm2,[esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,[16+esp] + movups [edi],xmm2 + xorps xmm4,[32+esp] + movups [16+edi],xmm3 + xorps xmm5,[48+esp] + movups [32+edi],xmm4 + xorps xmm6,[64+esp] + movups [48+edi],xmm5 + xorps xmm7,xmm1 + movups [64+edi],xmm6 + pshufd xmm2,xmm0,19 + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqa xmm3,[96+esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + sub eax,96 + jnc NEAR L$063xts_dec_loop6 + mov ecx,DWORD [240+ebp] + mov edx,ebp + mov ebx,ecx +L$062xts_dec_short: + add eax,96 + jz NEAR L$064xts_dec_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb NEAR L$065xts_dec_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je NEAR L$066xts_dec_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb NEAR L$067xts_dec_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa [esp],xmm5 + movdqa [16+esp],xmm6 + je NEAR L$068xts_dec_four + movdqa [32+esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + pxor xmm2,[esp] + movdqu xmm5,[48+esi] + pxor xmm3,[16+esp] + movdqu xmm6,[64+esi] + pxor xmm4,[32+esp] + lea esi,[80+esi] + pxor xmm5,[48+esp] + movdqa [64+esp],xmm7 + pxor xmm6,xmm7 + call __aesni_decrypt6 + movaps xmm1,[64+esp] + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,[32+esp] + movups [edi],xmm2 + xorps xmm5,[48+esp] + movups [16+edi],xmm3 + xorps xmm6,xmm1 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + lea edi,[80+edi] + jmp NEAR L$069xts_dec_done +align 16 +L$065xts_dec_one: + movups xmm2,[esi] + lea esi,[16+esi] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$070dec1_loop_12: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$070dec1_loop_12 +db 102,15,56,223,209 + xorps xmm2,xmm5 + movups [edi],xmm2 + lea edi,[16+edi] + movdqa xmm1,xmm5 + jmp NEAR L$069xts_dec_done +align 16 +L$066xts_dec_two: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + lea esi,[32+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_decrypt2 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups [edi],xmm2 + movups [16+edi],xmm3 + lea edi,[32+edi] + movdqa xmm1,xmm6 + jmp NEAR L$069xts_dec_done +align 16 +L$067xts_dec_three: + movaps xmm7,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + lea esi,[48+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_decrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + lea edi,[48+edi] + movdqa xmm1,xmm7 + jmp NEAR L$069xts_dec_done +align 16 +L$068xts_dec_four: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + xorps xmm2,[esp] + movups xmm5,[48+esi] + lea esi,[64+esi] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_decrypt4 + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + movups [edi],xmm2 + xorps xmm5,xmm6 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + lea edi,[64+edi] + movdqa xmm1,xmm6 + jmp NEAR L$069xts_dec_done +align 16 +L$064xts_dec_done6x: + mov eax,DWORD [112+esp] + and eax,15 + jz NEAR L$071xts_dec_ret + mov DWORD [112+esp],eax + jmp NEAR L$072xts_dec_only_one_more +align 16 +L$069xts_dec_done: + mov eax,DWORD [112+esp] + pxor xmm0,xmm0 + and eax,15 + jz NEAR L$071xts_dec_ret + pcmpgtd xmm0,xmm1 + mov DWORD [112+esp],eax + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm3,[96+esp] + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 +L$072xts_dec_only_one_more: + pshufd xmm5,xmm0,19 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm5,xmm3 + pxor xmm5,xmm1 + mov edx,ebp + mov ecx,ebx + movups xmm2,[esi] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$073dec1_loop_13: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$073dec1_loop_13 +db 102,15,56,223,209 + xorps xmm2,xmm5 + movups [edi],xmm2 +L$074xts_dec_steal: + movzx ecx,BYTE [16+esi] + movzx edx,BYTE [edi] + lea esi,[1+esi] + mov BYTE [edi],cl + mov BYTE [16+edi],dl + lea edi,[1+edi] + sub eax,1 + jnz NEAR L$074xts_dec_steal + sub edi,DWORD [112+esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,[edi] + xorps xmm2,xmm6 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$075dec1_loop_14: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$075dec1_loop_14 +db 102,15,56,223,209 + xorps xmm2,xmm6 + movups [edi],xmm2 +L$071xts_dec_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa [esp],xmm0 + pxor xmm3,xmm3 + movdqa [16+esp],xmm0 + pxor xmm4,xmm4 + movdqa [32+esp],xmm0 + pxor xmm5,xmm5 + movdqa [48+esp],xmm0 + pxor xmm6,xmm6 + movdqa [64+esp],xmm0 + pxor xmm7,xmm7 + movdqa [80+esp],xmm0 + mov esp,DWORD [116+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _aes_hw_cbc_encrypt +align 16 +_aes_hw_cbc_encrypt: +L$_aes_hw_cbc_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov ebx,esp + mov edi,DWORD [24+esp] + sub ebx,24 + mov eax,DWORD [28+esp] + and ebx,-16 + mov edx,DWORD [32+esp] + mov ebp,DWORD [36+esp] + test eax,eax + jz NEAR L$076cbc_abort + cmp DWORD [40+esp],0 + xchg ebx,esp + movups xmm7,[ebp] + mov ecx,DWORD [240+edx] + mov ebp,edx + mov DWORD [16+esp],ebx + mov ebx,ecx + je NEAR L$077cbc_decrypt + movaps xmm2,xmm7 + cmp eax,16 + jb NEAR L$078cbc_enc_tail + sub eax,16 + jmp NEAR L$079cbc_enc_loop +align 16 +L$079cbc_enc_loop: + movups xmm7,[esi] + lea esi,[16+esi] + movups xmm0,[edx] + movups xmm1,[16+edx] + xorps xmm7,xmm0 + lea edx,[32+edx] + xorps xmm2,xmm7 +L$080enc1_loop_15: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$080enc1_loop_15 +db 102,15,56,221,209 + mov ecx,ebx + mov edx,ebp + movups [edi],xmm2 + lea edi,[16+edi] + sub eax,16 + jnc NEAR L$079cbc_enc_loop + add eax,16 + jnz NEAR L$078cbc_enc_tail + movaps xmm7,xmm2 + pxor xmm2,xmm2 + jmp NEAR L$081cbc_ret +L$078cbc_enc_tail: + mov ecx,eax +dd 2767451785 + mov ecx,16 + sub ecx,eax + xor eax,eax +dd 2868115081 + lea edi,[edi-16] + mov ecx,ebx + mov esi,edi + mov edx,ebp + jmp NEAR L$079cbc_enc_loop +align 16 +L$077cbc_decrypt: + cmp eax,80 + jbe NEAR L$082cbc_dec_tail + movaps [esp],xmm7 + sub eax,80 + jmp NEAR L$083cbc_dec_loop6_enter +align 16 +L$084cbc_dec_loop6: + movaps [esp],xmm0 + movups [edi],xmm7 + lea edi,[16+edi] +L$083cbc_dec_loop6_enter: + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + movdqu xmm5,[48+esi] + movdqu xmm6,[64+esi] + movdqu xmm7,[80+esi] + call __aesni_decrypt6 + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,[esp] + xorps xmm3,xmm1 + movups xmm1,[32+esi] + xorps xmm4,xmm0 + movups xmm0,[48+esi] + xorps xmm5,xmm1 + movups xmm1,[64+esi] + xorps xmm6,xmm0 + movups xmm0,[80+esi] + xorps xmm7,xmm1 + movups [edi],xmm2 + movups [16+edi],xmm3 + lea esi,[96+esi] + movups [32+edi],xmm4 + mov ecx,ebx + movups [48+edi],xmm5 + mov edx,ebp + movups [64+edi],xmm6 + lea edi,[80+edi] + sub eax,96 + ja NEAR L$084cbc_dec_loop6 + movaps xmm2,xmm7 + movaps xmm7,xmm0 + add eax,80 + jle NEAR L$085cbc_dec_clear_tail_collected + movups [edi],xmm2 + lea edi,[16+edi] +L$082cbc_dec_tail: + movups xmm2,[esi] + movaps xmm6,xmm2 + cmp eax,16 + jbe NEAR L$086cbc_dec_one + movups xmm3,[16+esi] + movaps xmm5,xmm3 + cmp eax,32 + jbe NEAR L$087cbc_dec_two + movups xmm4,[32+esi] + cmp eax,48 + jbe NEAR L$088cbc_dec_three + movups xmm5,[48+esi] + cmp eax,64 + jbe NEAR L$089cbc_dec_four + movups xmm6,[64+esi] + movaps [esp],xmm7 + movups xmm2,[esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,[esp] + xorps xmm3,xmm1 + movups xmm1,[32+esi] + xorps xmm4,xmm0 + movups xmm0,[48+esi] + xorps xmm5,xmm1 + movups xmm7,[64+esi] + xorps xmm6,xmm0 + movups [edi],xmm2 + movups [16+edi],xmm3 + pxor xmm3,xmm3 + movups [32+edi],xmm4 + pxor xmm4,xmm4 + movups [48+edi],xmm5 + pxor xmm5,xmm5 + lea edi,[64+edi] + movaps xmm2,xmm6 + pxor xmm6,xmm6 + sub eax,80 + jmp NEAR L$090cbc_dec_tail_collected +align 16 +L$086cbc_dec_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$091dec1_loop_16: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$091dec1_loop_16 +db 102,15,56,223,209 + xorps xmm2,xmm7 + movaps xmm7,xmm6 + sub eax,16 + jmp NEAR L$090cbc_dec_tail_collected +align 16 +L$087cbc_dec_two: + call __aesni_decrypt2 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + movups [edi],xmm2 + movaps xmm2,xmm3 + pxor xmm3,xmm3 + lea edi,[16+edi] + movaps xmm7,xmm5 + sub eax,32 + jmp NEAR L$090cbc_dec_tail_collected +align 16 +L$088cbc_dec_three: + call __aesni_decrypt3 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + xorps xmm4,xmm5 + movups [edi],xmm2 + movaps xmm2,xmm4 + pxor xmm4,xmm4 + movups [16+edi],xmm3 + pxor xmm3,xmm3 + lea edi,[32+edi] + movups xmm7,[32+esi] + sub eax,48 + jmp NEAR L$090cbc_dec_tail_collected +align 16 +L$089cbc_dec_four: + call __aesni_decrypt4 + movups xmm1,[16+esi] + movups xmm0,[32+esi] + xorps xmm2,xmm7 + movups xmm7,[48+esi] + xorps xmm3,xmm6 + movups [edi],xmm2 + xorps xmm4,xmm1 + movups [16+edi],xmm3 + pxor xmm3,xmm3 + xorps xmm5,xmm0 + movups [32+edi],xmm4 + pxor xmm4,xmm4 + lea edi,[48+edi] + movaps xmm2,xmm5 + pxor xmm5,xmm5 + sub eax,64 + jmp NEAR L$090cbc_dec_tail_collected +align 16 +L$085cbc_dec_clear_tail_collected: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 +L$090cbc_dec_tail_collected: + and eax,15 + jnz NEAR L$092cbc_dec_tail_partial + movups [edi],xmm2 + pxor xmm0,xmm0 + jmp NEAR L$081cbc_ret +align 16 +L$092cbc_dec_tail_partial: + movaps [esp],xmm2 + pxor xmm0,xmm0 + mov ecx,16 + mov esi,esp + sub ecx,eax +dd 2767451785 + movdqa [esp],xmm2 +L$081cbc_ret: + mov esp,DWORD [16+esp] + mov ebp,DWORD [36+esp] + pxor xmm2,xmm2 + pxor xmm1,xmm1 + movups [ebp],xmm7 + pxor xmm7,xmm7 +L$076cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +__aesni_set_encrypt_key: + push ebp + push ebx + test eax,eax + jz NEAR L$093bad_pointer + test edx,edx + jz NEAR L$093bad_pointer + call L$094pic +L$094pic: + pop ebx + lea ebx,[(L$key_const-L$094pic)+ebx] + lea ebp,[_OPENSSL_ia32cap_P] + movups xmm0,[eax] + xorps xmm4,xmm4 + mov ebp,DWORD [4+ebp] + lea edx,[16+edx] + and ebp,268437504 + cmp ecx,256 + je NEAR L$09514rounds + cmp ecx,192 + je NEAR L$09612rounds + cmp ecx,128 + jne NEAR L$097bad_keybits +align 16 +L$09810rounds: + cmp ebp,268435456 + je NEAR L$09910rounds_alt + mov ecx,9 + movups [edx-16],xmm0 +db 102,15,58,223,200,1 + call L$100key_128_cold +db 102,15,58,223,200,2 + call L$101key_128 +db 102,15,58,223,200,4 + call L$101key_128 +db 102,15,58,223,200,8 + call L$101key_128 +db 102,15,58,223,200,16 + call L$101key_128 +db 102,15,58,223,200,32 + call L$101key_128 +db 102,15,58,223,200,64 + call L$101key_128 +db 102,15,58,223,200,128 + call L$101key_128 +db 102,15,58,223,200,27 + call L$101key_128 +db 102,15,58,223,200,54 + call L$101key_128 + movups [edx],xmm0 + mov DWORD [80+edx],ecx + jmp NEAR L$102good_key +align 16 +L$101key_128: + movups [edx],xmm0 + lea edx,[16+edx] +L$100key_128_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +align 16 +L$09910rounds_alt: + movdqa xmm5,[ebx] + mov ecx,8 + movdqa xmm4,[32+ebx] + movdqa xmm2,xmm0 + movdqu [edx-16],xmm0 +L$103loop_key128: +db 102,15,56,0,197 +db 102,15,56,221,196 + pslld xmm4,1 + lea edx,[16+edx] + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu [edx-16],xmm0 + movdqa xmm2,xmm0 + dec ecx + jnz NEAR L$103loop_key128 + movdqa xmm4,[48+ebx] +db 102,15,56,0,197 +db 102,15,56,221,196 + pslld xmm4,1 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu [edx],xmm0 + movdqa xmm2,xmm0 +db 102,15,56,0,197 +db 102,15,56,221,196 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu [16+edx],xmm0 + mov ecx,9 + mov DWORD [96+edx],ecx + jmp NEAR L$102good_key +align 16 +L$09612rounds: + movq xmm2,[16+eax] + cmp ebp,268435456 + je NEAR L$10412rounds_alt + mov ecx,11 + movups [edx-16],xmm0 +db 102,15,58,223,202,1 + call L$105key_192a_cold +db 102,15,58,223,202,2 + call L$106key_192b +db 102,15,58,223,202,4 + call L$107key_192a +db 102,15,58,223,202,8 + call L$106key_192b +db 102,15,58,223,202,16 + call L$107key_192a +db 102,15,58,223,202,32 + call L$106key_192b +db 102,15,58,223,202,64 + call L$107key_192a +db 102,15,58,223,202,128 + call L$106key_192b + movups [edx],xmm0 + mov DWORD [48+edx],ecx + jmp NEAR L$102good_key +align 16 +L$107key_192a: + movups [edx],xmm0 + lea edx,[16+edx] +align 16 +L$105key_192a_cold: + movaps xmm5,xmm2 +L$108key_192b_warm: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + ret +align 16 +L$106key_192b: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups [edx],xmm5 + shufps xmm3,xmm2,78 + movups [16+edx],xmm3 + lea edx,[32+edx] + jmp NEAR L$108key_192b_warm +align 16 +L$10412rounds_alt: + movdqa xmm5,[16+ebx] + movdqa xmm4,[32+ebx] + mov ecx,8 + movdqu [edx-16],xmm0 +L$109loop_key192: + movq [edx],xmm2 + movdqa xmm1,xmm2 +db 102,15,56,0,213 +db 102,15,56,221,212 + pslld xmm4,1 + lea edx,[24+edx] + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pshufd xmm3,xmm0,255 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu [edx-16],xmm0 + dec ecx + jnz NEAR L$109loop_key192 + mov ecx,11 + mov DWORD [32+edx],ecx + jmp NEAR L$102good_key +align 16 +L$09514rounds: + movups xmm2,[16+eax] + lea edx,[16+edx] + cmp ebp,268435456 + je NEAR L$11014rounds_alt + mov ecx,13 + movups [edx-32],xmm0 + movups [edx-16],xmm2 +db 102,15,58,223,202,1 + call L$111key_256a_cold +db 102,15,58,223,200,1 + call L$112key_256b +db 102,15,58,223,202,2 + call L$113key_256a +db 102,15,58,223,200,2 + call L$112key_256b +db 102,15,58,223,202,4 + call L$113key_256a +db 102,15,58,223,200,4 + call L$112key_256b +db 102,15,58,223,202,8 + call L$113key_256a +db 102,15,58,223,200,8 + call L$112key_256b +db 102,15,58,223,202,16 + call L$113key_256a +db 102,15,58,223,200,16 + call L$112key_256b +db 102,15,58,223,202,32 + call L$113key_256a +db 102,15,58,223,200,32 + call L$112key_256b +db 102,15,58,223,202,64 + call L$113key_256a + movups [edx],xmm0 + mov DWORD [16+edx],ecx + xor eax,eax + jmp NEAR L$102good_key +align 16 +L$113key_256a: + movups [edx],xmm2 + lea edx,[16+edx] +L$111key_256a_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +align 16 +L$112key_256b: + movups [edx],xmm0 + lea edx,[16+edx] + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + ret +align 16 +L$11014rounds_alt: + movdqa xmm5,[ebx] + movdqa xmm4,[32+ebx] + mov ecx,7 + movdqu [edx-32],xmm0 + movdqa xmm1,xmm2 + movdqu [edx-16],xmm2 +L$114loop_key256: +db 102,15,56,0,213 +db 102,15,56,221,212 + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + pxor xmm0,xmm2 + movdqu [edx],xmm0 + dec ecx + jz NEAR L$115done_key256 + pshufd xmm2,xmm0,255 + pxor xmm3,xmm3 +db 102,15,56,221,211 + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + pxor xmm2,xmm1 + movdqu [16+edx],xmm2 + lea edx,[32+edx] + movdqa xmm1,xmm2 + jmp NEAR L$114loop_key256 +L$115done_key256: + mov ecx,13 + mov DWORD [16+edx],ecx +L$102good_key: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + xor eax,eax + pop ebx + pop ebp + ret +align 4 +L$093bad_pointer: + mov eax,-1 + pop ebx + pop ebp + ret +align 4 +L$097bad_keybits: + pxor xmm0,xmm0 + mov eax,-2 + pop ebx + pop ebp + ret +global _aes_hw_set_encrypt_key +align 16 +_aes_hw_set_encrypt_key: +L$_aes_hw_set_encrypt_key_begin: +%ifdef BORINGSSL_DISPATCH_TEST + push ebx + push edx + call L$116pic +L$116pic: + pop ebx + lea ebx,[(_BORINGSSL_function_hit+3-L$116pic)+ebx] + mov edx,1 + mov BYTE [ebx],dl + pop edx + pop ebx +%endif + mov eax,DWORD [4+esp] + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + call __aesni_set_encrypt_key + ret +global _aes_hw_set_decrypt_key +align 16 +_aes_hw_set_decrypt_key: +L$_aes_hw_set_decrypt_key_begin: + mov eax,DWORD [4+esp] + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + call __aesni_set_encrypt_key + mov edx,DWORD [12+esp] + shl ecx,4 + test eax,eax + jnz NEAR L$117dec_key_ret + lea eax,[16+ecx*1+edx] + movups xmm0,[edx] + movups xmm1,[eax] + movups [eax],xmm0 + movups [edx],xmm1 + lea edx,[16+edx] + lea eax,[eax-16] +L$118dec_key_inverse: + movups xmm0,[edx] + movups xmm1,[eax] +db 102,15,56,219,192 +db 102,15,56,219,201 + lea edx,[16+edx] + lea eax,[eax-16] + movups [16+eax],xmm0 + movups [edx-16],xmm1 + cmp eax,edx + ja NEAR L$118dec_key_inverse + movups xmm0,[edx] +db 102,15,56,219,192 + movups [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + xor eax,eax +L$117dec_key_ret: + ret +align 64 +L$key_const: +dd 202313229,202313229,202313229,202313229 +dd 67569157,67569157,67569157,67569157 +dd 1,1,1,1 +dd 27,27,27,27 +db 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +db 115,108,46,111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-apple.S new file mode 100644 index 0000000000..f3505b9529 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-apple.S @@ -0,0 +1,2359 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.globl _aes_hw_encrypt +.private_extern _aes_hw_encrypt + +.p2align 4 +_aes_hw_encrypt: + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + + movb $1,_BORINGSSL_function_hit+1(%rip) +#endif + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_enc1_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + ret + + + +.globl _aes_hw_decrypt +.private_extern _aes_hw_decrypt + +.p2align 4 +_aes_hw_decrypt: + +_CET_ENDBR + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_dec1_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + ret + + + +.p2align 4 +_aesni_encrypt2: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret + + + +.p2align 4 +_aesni_decrypt2: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret + + + +.p2align 4 +_aesni_encrypt3: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret + + + +.p2align 4 +_aesni_decrypt3: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret + + + +.p2align 4 +_aesni_encrypt4: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +L$enc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret + + + +.p2align 4 +_aesni_decrypt4: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +L$dec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret + + + +.p2align 4 +_aesni_encrypt6: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$enc_loop6_enter +.p2align 4 +L$enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +L$enc_loop6_enter: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret + + + +.p2align 4 +_aesni_decrypt6: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$dec_loop6_enter +.p2align 4 +L$dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +L$dec_loop6_enter: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret + + + +.p2align 4 +_aesni_encrypt8: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$enc_loop8_inner +.p2align 4 +L$enc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +L$enc_loop8_inner: +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +L$enc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + ret + + + +.p2align 4 +_aesni_decrypt8: + + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$dec_loop8_inner +.p2align 4 +L$dec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +L$dec_loop8_inner: +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +L$dec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + ret + + +.globl _aes_hw_ecb_encrypt +.private_extern _aes_hw_ecb_encrypt + +.p2align 4 +_aes_hw_ecb_encrypt: + +_CET_ENDBR + andq $-16,%rdx + jz L$ecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz L$ecb_decrypt + + cmpq $0x80,%rdx + jb L$ecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $0x80,%rdx + jmp L$ecb_enc_loop8_enter +.p2align 4 +L$ecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $0x80,%rdx + jnc L$ecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $0x80,%rdx + jz L$ecb_ret + +L$ecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $0x20,%rdx + jb L$ecb_enc_one + movups 16(%rdi),%xmm3 + je L$ecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $0x40,%rdx + jb L$ecb_enc_three + movups 48(%rdi),%xmm5 + je L$ecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $0x60,%rdx + jb L$ecb_enc_five + movups 80(%rdi),%xmm7 + je L$ecb_enc_six + movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp L$ecb_ret + +.p2align 4 +L$ecb_decrypt: + cmpq $0x80,%rdx + jb L$ecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $0x80,%rdx + jmp L$ecb_dec_loop8_enter +.p2align 4 +L$ecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $0x80,%rdx + jnc L$ecb_dec_loop8 + + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movq %r11,%rcx + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movl %r10d,%eax + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 + leaq 128(%rsi),%rsi + addq $0x80,%rdx + jz L$ecb_ret + +L$ecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $0x20,%rdx + jb L$ecb_dec_one + movups 16(%rdi),%xmm3 + je L$ecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $0x40,%rdx + jb L$ecb_dec_three + movups 48(%rdi),%xmm5 + je L$ecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $0x60,%rdx + jb L$ecb_dec_five + movups 80(%rdi),%xmm7 + je L$ecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + +L$ecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + ret + + +.globl _aes_hw_ctr32_encrypt_blocks +.private_extern _aes_hw_ctr32_encrypt_blocks + +.p2align 4 +_aes_hw_ctr32_encrypt_blocks: + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + movb $1,_BORINGSSL_function_hit(%rip) +#endif + cmpq $1,%rdx + jne L$ctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_5: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_5 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp L$ctr32_epilogue + +.p2align 4 +L$ctr32_bulk: + leaq (%rsp),%r11 + + pushq %rbp + + subq $128,%rsp + andq $-16,%rsp + + + + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%ebp + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %ebp,%eax + xorl %ebp,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %ebp,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %ebp,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %ebp,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %ebp,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + xorl %ebp,%r9d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb L$ctr32_tail + + leaq 128(%rcx),%rcx + subq $8,%rdx + jmp L$ctr32_loop8 + +.p2align 5 +L$ctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %ebp,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %ebp,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp L$ctr32_enc_done + +.p2align 4 +L$ctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + prefetcht0 448(%rdi) + prefetcht0 512(%rdi) + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc L$ctr32_loop8 + + addq $8,%rdx + jz L$ctr32_done + leaq -128(%rcx),%rcx + +L$ctr32_tail: + + + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb L$ctr32_loop3 + je L$ctr32_loop4 + + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call L$enc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb L$ctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je L$ctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz L$ctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz L$ctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb L$ctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je L$ctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + +L$ctr32_done: + xorps %xmm0,%xmm0 + xorl %ebp,%ebp + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 + movq -8(%r11),%rbp + + leaq (%r11),%rsp + +L$ctr32_epilogue: + ret + + +.globl _aes_hw_cbc_encrypt +.private_extern _aes_hw_cbc_encrypt + +.p2align 4 +_aes_hw_cbc_encrypt: + +_CET_ENDBR + testq %rdx,%rdx + jz L$cbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz L$cbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb L$cbc_enc_tail + subq $16,%rdx + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +L$oop_enc1_6: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_6 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc L$cbc_enc_loop + addq $16,%rdx + jnz L$cbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + jmp L$cbc_ret + +L$cbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp L$cbc_enc_loop + +.p2align 4 +L$cbc_decrypt: + cmpq $16,%rdx + jne L$cbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_7: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_7 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$cbc_ret +.p2align 4 +L$cbc_decrypt_bulk: + leaq (%rsp),%r11 + + pushq %rbp + + subq $16,%rsp + andq $-16,%rsp + movq %rcx,%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $0x50,%rdx + jbe L$cbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + cmpq $0x70,%rdx + jbe L$cbc_dec_six_or_seven + + subq $0x70,%rdx + leaq 112(%rcx),%rcx + jmp L$cbc_dec_loop8_enter +.p2align 4 +L$cbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +L$cbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + movq $-1,%rbp + cmpq $0x70,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + adcq $0,%rbp + andq $128,%rbp +.byte 102,68,15,56,222,201 + addq %rdi,%rbp + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%rbp),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%rbp),%xmm12 + movdqu 32(%rbp),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%rbp),%xmm14 + movdqu 64(%rbp),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%rbp),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $0x80,%rdx + ja L$cbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $0x70,%rdx + jle L$cbc_dec_clear_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $0x50,%rdx + jbe L$cbc_dec_tail + + movaps %xmm11,%xmm2 +L$cbc_dec_six_or_seven: + cmpq $0x60,%rdx + ja L$cbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp L$cbc_dec_tail_collected + +L$cbc_dec_tail: + movups (%rdi),%xmm2 + subq $0x10,%rdx + jbe L$cbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $0x10,%rdx + jbe L$cbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $0x10,%rdx + jbe L$cbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $0x10,%rdx + jbe L$cbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + subq $0x10,%rdx + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_8: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_8 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leaq 16(%rsi),%rsi + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 + leaq 32(%rsi),%rsi + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 + leaq 48(%rsi),%rsi + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 +L$cbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz L$cbc_dec_tail_partial + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$cbc_dec_ret +.p2align 4 +L$cbc_dec_tail_partial: + movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + movdqa %xmm2,(%rsp) + +L$cbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movq -8(%r11),%rbp + + leaq (%r11),%rsp + +L$cbc_ret: + ret + + +.globl _aes_hw_set_decrypt_key +.private_extern _aes_hw_set_decrypt_key + +.p2align 4 +_aes_hw_set_decrypt_key: + +_CET_ENDBR +.byte 0x48,0x83,0xEC,0x08 + + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz L$dec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +L$dec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja L$dec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + pxor %xmm1,%xmm1 + movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 +L$dec_key_ret: + addq $8,%rsp + + ret + +L$SEH_end_set_decrypt_key: + +.globl _aes_hw_set_encrypt_key +.private_extern _aes_hw_set_encrypt_key + +.p2align 4 +_aes_hw_set_encrypt_key: +__aesni_set_encrypt_key: + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + movb $1,_BORINGSSL_function_hit+3(%rip) +#endif +.byte 0x48,0x83,0xEC,0x08 + + movq $-1,%rax + testq %rdi,%rdi + jz L$enc_key_ret + testq %rdx,%rdx + jz L$enc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq _OPENSSL_ia32cap_P(%rip),%r10 + movl 4(%r10),%r10d + andl $268437504,%r10d + leaq 16(%rdx),%rax + cmpl $256,%esi + je L$14rounds + cmpl $192,%esi + je L$12rounds + cmpl $128,%esi + jne L$bad_keybits + +L$10rounds: + movl $9,%esi + cmpl $268435456,%r10d + je L$10rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call L$key_expansion_128_cold +.byte 102,15,58,223,200,2 + call L$key_expansion_128 +.byte 102,15,58,223,200,4 + call L$key_expansion_128 +.byte 102,15,58,223,200,8 + call L$key_expansion_128 +.byte 102,15,58,223,200,16 + call L$key_expansion_128 +.byte 102,15,58,223,200,32 + call L$key_expansion_128 +.byte 102,15,58,223,200,64 + call L$key_expansion_128 +.byte 102,15,58,223,200,128 + call L$key_expansion_128 +.byte 102,15,58,223,200,27 + call L$key_expansion_128 +.byte 102,15,58,223,200,54 + call L$key_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$10rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa L$key_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp L$oop_key128 + +.p2align 4 +L$oop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz L$oop_key128 + + movdqa L$key_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + cmpl $268435456,%r10d + je L$12rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_192a_cold +.byte 102,15,58,223,202,2 + call L$key_expansion_192b +.byte 102,15,58,223,202,4 + call L$key_expansion_192a +.byte 102,15,58,223,202,8 + call L$key_expansion_192b +.byte 102,15,58,223,202,16 + call L$key_expansion_192a +.byte 102,15,58,223,202,32 + call L$key_expansion_192b +.byte 102,15,58,223,202,64 + call L$key_expansion_192a +.byte 102,15,58,223,202,128 + call L$key_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds_alt: + movdqa L$key_rotate192(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp L$oop_key192 + +.p2align 4 +L$oop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $0xff,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz L$oop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + cmpl $268435456,%r10d + je L$14rounds_alt + + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_256a_cold +.byte 102,15,58,223,200,1 + call L$key_expansion_256b +.byte 102,15,58,223,202,2 + call L$key_expansion_256a +.byte 102,15,58,223,200,2 + call L$key_expansion_256b +.byte 102,15,58,223,202,4 + call L$key_expansion_256a +.byte 102,15,58,223,200,4 + call L$key_expansion_256b +.byte 102,15,58,223,202,8 + call L$key_expansion_256a +.byte 102,15,58,223,200,8 + call L$key_expansion_256b +.byte 102,15,58,223,202,16 + call L$key_expansion_256a +.byte 102,15,58,223,200,16 + call L$key_expansion_256b +.byte 102,15,58,223,202,32 + call L$key_expansion_256a +.byte 102,15,58,223,200,32 + call L$key_expansion_256b +.byte 102,15,58,223,202,64 + call L$key_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp L$oop_key256 + +.p2align 4 +L$oop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz L$done_key256 + + pshufd $0xff,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp L$oop_key256 + +L$done_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$bad_keybits: + movq $-2,%rax +L$enc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + addq $8,%rsp + + ret + +L$SEH_end_set_encrypt_key: + +.p2align 4 +L$key_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret + +.p2align 4 +L$key_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_192a_cold: + movaps %xmm2,%xmm5 +L$key_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret + +.p2align 4 +L$key_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp L$key_expansion_192b_warm + +.p2align 4 +L$key_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +L$key_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret + +.p2align 4 +L$key_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret + + +.section __DATA,__const +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$increment32: +.long 6,6,6,0 +L$increment64: +.long 1,0,0,0 +L$xts_magic: +.long 0x87,0,1,0 +L$increment1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +L$key_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +L$key_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +L$key_rcon1: +.long 1,1,1,1 +L$key_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-linux.S new file mode 100644 index 0000000000..68742fbb2c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-linux.S @@ -0,0 +1,2361 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +.globl aes_hw_encrypt +.hidden aes_hw_encrypt +.type aes_hw_encrypt,@function +.align 16 +aes_hw_encrypt: +.cfi_startproc +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+1(%rip) +#endif + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + ret +.cfi_endproc +.size aes_hw_encrypt,.-aes_hw_encrypt + +.globl aes_hw_decrypt +.hidden aes_hw_decrypt +.type aes_hw_decrypt,@function +.align 16 +aes_hw_decrypt: +.cfi_startproc +_CET_ENDBR + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + ret +.cfi_endproc +.size aes_hw_decrypt, .-aes_hw_decrypt +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.cfi_endproc +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.cfi_endproc +.size _aesni_decrypt2,.-_aesni_decrypt2 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.cfi_endproc +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.cfi_endproc +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Lenc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.cfi_endproc +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Ldec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.cfi_endproc +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.Lenc_loop6_enter: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.cfi_endproc +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.Ldec_loop6_enter: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.cfi_endproc +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop8_inner +.align 16 +.Lenc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.Lenc_loop8_inner: +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.Lenc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + ret +.cfi_endproc +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop8_inner +.align 16 +.Ldec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.Ldec_loop8_inner: +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.Ldec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + ret +.cfi_endproc +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aes_hw_ecb_encrypt +.hidden aes_hw_ecb_encrypt +.type aes_hw_ecb_encrypt,@function +.align 16 +aes_hw_ecb_encrypt: +.cfi_startproc +_CET_ENDBR + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $0x80,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $0x80,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $0x80,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $0x80,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $0x20,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $0x40,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $0x60,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $0x80,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $0x80,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $0x80,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movq %r11,%rcx + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movl %r10d,%eax + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 + leaq 128(%rsi),%rsi + addq $0x80,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $0x20,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $0x40,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $0x60,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + +.Lecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + ret +.cfi_endproc +.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt +.globl aes_hw_ctr32_encrypt_blocks +.hidden aes_hw_ctr32_encrypt_blocks +.type aes_hw_ctr32_encrypt_blocks,@function +.align 16 +aes_hw_ctr32_encrypt_blocks: +.cfi_startproc +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + movb $1,BORINGSSL_function_hit(%rip) +#endif + cmpq $1,%rdx + jne .Lctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp .Lctr32_epilogue + +.align 16 +.Lctr32_bulk: + leaq (%rsp),%r11 +.cfi_def_cfa_register %r11 + pushq %rbp +.cfi_offset %rbp,-16 + subq $128,%rsp + andq $-16,%rsp + + + + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%ebp + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %ebp,%eax + xorl %ebp,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %ebp,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %ebp,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %ebp,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %ebp,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + xorl %ebp,%r9d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb .Lctr32_tail + + leaq 128(%rcx),%rcx + subq $8,%rdx + jmp .Lctr32_loop8 + +.align 32 +.Lctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %ebp,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %ebp,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp .Lctr32_enc_done + +.align 16 +.Lctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + prefetcht0 448(%rdi) + prefetcht0 512(%rdi) + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc .Lctr32_loop8 + + addq $8,%rdx + jz .Lctr32_done + leaq -128(%rcx),%rcx + +.Lctr32_tail: + + + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb .Lctr32_loop3 + je .Lctr32_loop4 + + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call .Lenc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb .Lctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je .Lctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz .Lctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz .Lctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb .Lctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je .Lctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + +.Lctr32_done: + xorps %xmm0,%xmm0 + xorl %ebp,%ebp + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 + movq -8(%r11),%rbp +.cfi_restore %rbp + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lctr32_epilogue: + ret +.cfi_endproc +.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks +.globl aes_hw_cbc_encrypt +.hidden aes_hw_cbc_encrypt +.type aes_hw_cbc_encrypt,@function +.align 16 +aes_hw_cbc_encrypt: +.cfi_startproc +_CET_ENDBR + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_6: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_6 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + cmpq $16,%rdx + jne .Lcbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_7: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_7 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_ret +.align 16 +.Lcbc_decrypt_bulk: + leaq (%rsp),%r11 +.cfi_def_cfa_register %r11 + pushq %rbp +.cfi_offset %rbp,-16 + subq $16,%rsp + andq $-16,%rsp + movq %rcx,%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $0x50,%rdx + jbe .Lcbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + cmpq $0x70,%rdx + jbe .Lcbc_dec_six_or_seven + + subq $0x70,%rdx + leaq 112(%rcx),%rcx + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + movq $-1,%rbp + cmpq $0x70,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + adcq $0,%rbp + andq $128,%rbp +.byte 102,68,15,56,222,201 + addq %rdi,%rbp + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%rbp),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%rbp),%xmm12 + movdqu 32(%rbp),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%rbp),%xmm14 + movdqu 64(%rbp),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%rbp),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $0x80,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $0x70,%rdx + jle .Lcbc_dec_clear_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $0x50,%rdx + jbe .Lcbc_dec_tail + + movaps %xmm11,%xmm2 +.Lcbc_dec_six_or_seven: + cmpq $0x60,%rdx + ja .Lcbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp .Lcbc_dec_tail_collected + +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + subq $0x10,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $0x10,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $0x10,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $0x10,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + subq $0x10,%rdx + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_8: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_8 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leaq 16(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 + leaq 32(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 + leaq 48(%rsi),%rsi + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 +.Lcbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + movdqa %xmm2,(%rsp) + +.Lcbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movq -8(%r11),%rbp +.cfi_restore %rbp + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lcbc_ret: + ret +.cfi_endproc +.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt +.globl aes_hw_set_decrypt_key +.hidden aes_hw_set_decrypt_key +.type aes_hw_set_decrypt_key,@function +.align 16 +aes_hw_set_decrypt_key: +.cfi_startproc +_CET_ENDBR +.byte 0x48,0x83,0xEC,0x08 +.cfi_adjust_cfa_offset 8 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + pxor %xmm1,%xmm1 + movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 +.Ldec_key_ret: + addq $8,%rsp +.cfi_adjust_cfa_offset -8 + ret +.cfi_endproc +.LSEH_end_set_decrypt_key: +.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key +.globl aes_hw_set_encrypt_key +.hidden aes_hw_set_encrypt_key +.type aes_hw_set_encrypt_key,@function +.align 16 +aes_hw_set_encrypt_key: +__aesni_set_encrypt_key: +.cfi_startproc +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + movb $1,BORINGSSL_function_hit+3(%rip) +#endif +.byte 0x48,0x83,0xEC,0x08 +.cfi_adjust_cfa_offset 8 + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq OPENSSL_ia32cap_P(%rip),%r10 + movl 4(%r10),%r10d + andl $268437504,%r10d + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + cmpl $268435456,%r10d + je .L10rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call .Lkey_expansion_128_cold +.byte 102,15,58,223,200,2 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,4 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,8 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,16 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,32 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,64 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,128 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,27 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,54 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L10rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa .Lkey_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp .Loop_key128 + +.align 16 +.Loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz .Loop_key128 + + movdqa .Lkey_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + cmpl $268435456,%r10d + je .L12rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_192a_cold +.byte 102,15,58,223,202,2 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,8 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,32 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,128 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L12rounds_alt: + movdqa .Lkey_rotate192(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp .Loop_key192 + +.align 16 +.Loop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $0xff,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz .Loop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + cmpl $268435456,%r10d + je .L14rounds_alt + + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_256a_cold +.byte 102,15,58,223,200,1 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,2 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,2 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,4 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,8 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,8 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,16 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,32 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,32 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp .Loop_key256 + +.align 16 +.Loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz .Ldone_key256 + + pshufd $0xff,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp .Loop_key256 + +.Ldone_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + addq $8,%rsp +.cfi_adjust_cfa_offset -8 + ret +.cfi_endproc +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.section .rodata +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 +.Lincrement1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Lkey_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +.Lkey_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +.Lkey_rcon1: +.long 1,1,1,1 +.Lkey_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-win.asm new file mode 100644 index 0000000000..6c5d9ad23c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesni-x86_64-win.asm @@ -0,0 +1,2676 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P +global aes_hw_encrypt + +ALIGN 16 +aes_hw_encrypt: + +_CET_ENDBR +%ifdef BORINGSSL_DISPATCH_TEST +EXTERN BORINGSSL_function_hit + mov BYTE[((BORINGSSL_function_hit+1))],1 +%endif + movups xmm2,XMMWORD[rcx] + mov eax,DWORD[240+r8] + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[16+r8] + lea r8,[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_1: + DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[r8] + lea r8,[16+r8] + jnz NEAR $L$oop_enc1_1 + DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups XMMWORD[rdx],xmm2 + pxor xmm2,xmm2 + ret + + + +global aes_hw_decrypt + +ALIGN 16 +aes_hw_decrypt: + +_CET_ENDBR + movups xmm2,XMMWORD[rcx] + mov eax,DWORD[240+r8] + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[16+r8] + lea r8,[32+r8] + xorps xmm2,xmm0 +$L$oop_dec1_2: + DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[r8] + lea r8,[16+r8] + jnz NEAR $L$oop_dec1_2 + DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups XMMWORD[rdx],xmm2 + pxor xmm2,xmm2 + ret + + + +ALIGN 16 +_aesni_encrypt2: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop2: + DB 102,15,56,220,209 + DB 102,15,56,220,217 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,220,208 + DB 102,15,56,220,216 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop2 + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,221,208 + DB 102,15,56,221,216 + ret + + + +ALIGN 16 +_aesni_decrypt2: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop2: + DB 102,15,56,222,209 + DB 102,15,56,222,217 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,222,208 + DB 102,15,56,222,216 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop2 + + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,223,208 + DB 102,15,56,223,216 + ret + + + +ALIGN 16 +_aesni_encrypt3: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop3: + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop3 + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,221,208 + DB 102,15,56,221,216 + DB 102,15,56,221,224 + ret + + + +ALIGN 16 +_aesni_decrypt3: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop3: + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop3 + + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,223,208 + DB 102,15,56,223,216 + DB 102,15,56,223,224 + ret + + + +ALIGN 16 +_aesni_encrypt4: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + DB 0x0f,0x1f,0x00 + add rax,16 + +$L$enc_loop4: + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop4 + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,221,208 + DB 102,15,56,221,216 + DB 102,15,56,221,224 + DB 102,15,56,221,232 + ret + + + +ALIGN 16 +_aesni_decrypt4: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + DB 0x0f,0x1f,0x00 + add rax,16 + +$L$dec_loop4: + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop4 + + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,223,208 + DB 102,15,56,223,216 + DB 102,15,56,223,224 + DB 102,15,56,223,232 + ret + + + +ALIGN 16 +_aesni_encrypt6: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + DB 102,15,56,220,209 + lea rcx,[32+rax*1+rcx] + neg rax + DB 102,15,56,220,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + DB 102,15,56,220,225 + pxor xmm7,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$enc_loop6_enter +ALIGN 16 +$L$enc_loop6: + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 +$L$enc_loop6_enter: + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + DB 102,15,56,220,240 + DB 102,15,56,220,248 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop6 + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,15,56,221,208 + DB 102,15,56,221,216 + DB 102,15,56,221,224 + DB 102,15,56,221,232 + DB 102,15,56,221,240 + DB 102,15,56,221,248 + ret + + + +ALIGN 16 +_aesni_decrypt6: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + DB 102,15,56,222,209 + lea rcx,[32+rax*1+rcx] + neg rax + DB 102,15,56,222,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + DB 102,15,56,222,225 + pxor xmm7,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$dec_loop6_enter +ALIGN 16 +$L$dec_loop6: + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 +$L$dec_loop6_enter: + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop6 + + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,15,56,223,208 + DB 102,15,56,223,216 + DB 102,15,56,223,224 + DB 102,15,56,223,232 + DB 102,15,56,223,240 + DB 102,15,56,223,248 + ret + + + +ALIGN 16 +_aesni_encrypt8: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,[32+rax*1+rcx] + neg rax + DB 102,15,56,220,209 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + DB 102,15,56,220,217 + pxor xmm9,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$enc_loop8_inner +ALIGN 16 +$L$enc_loop8: + DB 102,15,56,220,209 + DB 102,15,56,220,217 +$L$enc_loop8_inner: + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 +$L$enc_loop8_enter: + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop8 + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + DB 102,15,56,221,208 + DB 102,15,56,221,216 + DB 102,15,56,221,224 + DB 102,15,56,221,232 + DB 102,15,56,221,240 + DB 102,15,56,221,248 + DB 102,68,15,56,221,192 + DB 102,68,15,56,221,200 + ret + + + +ALIGN 16 +_aesni_decrypt8: + + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,[32+rax*1+rcx] + neg rax + DB 102,15,56,222,209 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + DB 102,15,56,222,217 + pxor xmm9,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$dec_loop8_inner +ALIGN 16 +$L$dec_loop8: + DB 102,15,56,222,209 + DB 102,15,56,222,217 +$L$dec_loop8_inner: + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 +$L$dec_loop8_enter: + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop8 + + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + DB 102,15,56,223,208 + DB 102,15,56,223,216 + DB 102,15,56,223,224 + DB 102,15,56,223,232 + DB 102,15,56,223,240 + DB 102,15,56,223,248 + DB 102,68,15,56,223,192 + DB 102,68,15,56,223,200 + ret + + +global aes_hw_ecb_encrypt + +ALIGN 16 +aes_hw_ecb_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes_hw_ecb_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + lea rsp,[((-88))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 +$L$ecb_enc_body: + and rdx,-16 + jz NEAR $L$ecb_ret + + mov eax,DWORD[240+rcx] + movups xmm0,XMMWORD[rcx] + mov r11,rcx + mov r10d,eax + test r8d,r8d + jz NEAR $L$ecb_decrypt + + cmp rdx,0x80 + jb NEAR $L$ecb_enc_tail + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqu xmm4,XMMWORD[32+rdi] + movdqu xmm5,XMMWORD[48+rdi] + movdqu xmm6,XMMWORD[64+rdi] + movdqu xmm7,XMMWORD[80+rdi] + movdqu xmm8,XMMWORD[96+rdi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] + sub rdx,0x80 + jmp NEAR $L$ecb_enc_loop8_enter +ALIGN 16 +$L$ecb_enc_loop8: + movups XMMWORD[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD[rdi] + mov eax,r10d + movups XMMWORD[16+rsi],xmm3 + movdqu xmm3,XMMWORD[16+rdi] + movups XMMWORD[32+rsi],xmm4 + movdqu xmm4,XMMWORD[32+rdi] + movups XMMWORD[48+rsi],xmm5 + movdqu xmm5,XMMWORD[48+rdi] + movups XMMWORD[64+rsi],xmm6 + movdqu xmm6,XMMWORD[64+rdi] + movups XMMWORD[80+rsi],xmm7 + movdqu xmm7,XMMWORD[80+rdi] + movups XMMWORD[96+rsi],xmm8 + movdqu xmm8,XMMWORD[96+rdi] + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] +$L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + sub rdx,0x80 + jnc NEAR $L$ecb_enc_loop8 + + movups XMMWORD[rsi],xmm2 + mov rcx,r11 + movups XMMWORD[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + movups XMMWORD[80+rsi],xmm7 + movups XMMWORD[96+rsi],xmm8 + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + add rdx,0x80 + jz NEAR $L$ecb_ret + +$L$ecb_enc_tail: + movups xmm2,XMMWORD[rdi] + cmp rdx,0x20 + jb NEAR $L$ecb_enc_one + movups xmm3,XMMWORD[16+rdi] + je NEAR $L$ecb_enc_two + movups xmm4,XMMWORD[32+rdi] + cmp rdx,0x40 + jb NEAR $L$ecb_enc_three + movups xmm5,XMMWORD[48+rdi] + je NEAR $L$ecb_enc_four + movups xmm6,XMMWORD[64+rdi] + cmp rdx,0x60 + jb NEAR $L$ecb_enc_five + movups xmm7,XMMWORD[80+rdi] + je NEAR $L$ecb_enc_six + movdqu xmm8,XMMWORD[96+rdi] + xorps xmm9,xmm9 + call _aesni_encrypt8 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + movups XMMWORD[80+rsi],xmm7 + movups XMMWORD[96+rsi],xmm8 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_one: + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_3: + DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_3 + DB 102,15,56,221,209 + movups XMMWORD[rsi],xmm2 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_two: + call _aesni_encrypt2 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_three: + call _aesni_encrypt3 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_four: + call _aesni_encrypt4 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_five: + xorps xmm7,xmm7 + call _aesni_encrypt6 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_six: + call _aesni_encrypt6 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + movups XMMWORD[80+rsi],xmm7 + jmp NEAR $L$ecb_ret + +ALIGN 16 +$L$ecb_decrypt: + cmp rdx,0x80 + jb NEAR $L$ecb_dec_tail + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqu xmm4,XMMWORD[32+rdi] + movdqu xmm5,XMMWORD[48+rdi] + movdqu xmm6,XMMWORD[64+rdi] + movdqu xmm7,XMMWORD[80+rdi] + movdqu xmm8,XMMWORD[96+rdi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] + sub rdx,0x80 + jmp NEAR $L$ecb_dec_loop8_enter +ALIGN 16 +$L$ecb_dec_loop8: + movups XMMWORD[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD[rdi] + mov eax,r10d + movups XMMWORD[16+rsi],xmm3 + movdqu xmm3,XMMWORD[16+rdi] + movups XMMWORD[32+rsi],xmm4 + movdqu xmm4,XMMWORD[32+rdi] + movups XMMWORD[48+rsi],xmm5 + movdqu xmm5,XMMWORD[48+rdi] + movups XMMWORD[64+rsi],xmm6 + movdqu xmm6,XMMWORD[64+rdi] + movups XMMWORD[80+rsi],xmm7 + movdqu xmm7,XMMWORD[80+rdi] + movups XMMWORD[96+rsi],xmm8 + movdqu xmm8,XMMWORD[96+rdi] + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] +$L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups xmm0,XMMWORD[r11] + sub rdx,0x80 + jnc NEAR $L$ecb_dec_loop8 + + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + mov rcx,r11 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + mov eax,r10d + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + movups XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + movups XMMWORD[96+rsi],xmm8 + pxor xmm8,xmm8 + movups XMMWORD[112+rsi],xmm9 + pxor xmm9,xmm9 + lea rsi,[128+rsi] + add rdx,0x80 + jz NEAR $L$ecb_ret + +$L$ecb_dec_tail: + movups xmm2,XMMWORD[rdi] + cmp rdx,0x20 + jb NEAR $L$ecb_dec_one + movups xmm3,XMMWORD[16+rdi] + je NEAR $L$ecb_dec_two + movups xmm4,XMMWORD[32+rdi] + cmp rdx,0x40 + jb NEAR $L$ecb_dec_three + movups xmm5,XMMWORD[48+rdi] + je NEAR $L$ecb_dec_four + movups xmm6,XMMWORD[64+rdi] + cmp rdx,0x60 + jb NEAR $L$ecb_dec_five + movups xmm7,XMMWORD[80+rdi] + je NEAR $L$ecb_dec_six + movups xmm8,XMMWORD[96+rdi] + movups xmm0,XMMWORD[rcx] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + movups XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + movups XMMWORD[96+rsi],xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_one: + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_4: + DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_4 + DB 102,15,56,223,209 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_two: + call _aesni_decrypt2 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_three: + call _aesni_decrypt3 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_four: + call _aesni_decrypt4 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_five: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_six: + call _aesni_decrypt6 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + movups XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + +$L$ecb_ret: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + movaps xmm6,XMMWORD[rsp] + movaps XMMWORD[rsp],xmm0 + movaps xmm7,XMMWORD[16+rsp] + movaps XMMWORD[16+rsp],xmm0 + movaps xmm8,XMMWORD[32+rsp] + movaps XMMWORD[32+rsp],xmm0 + movaps xmm9,XMMWORD[48+rsp] + movaps XMMWORD[48+rsp],xmm0 + lea rsp,[88+rsp] +$L$ecb_enc_ret: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes_hw_ecb_encrypt: +global aes_hw_ctr32_encrypt_blocks + +ALIGN 16 +aes_hw_ctr32_encrypt_blocks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes_hw_ctr32_encrypt_blocks: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR +%ifdef BORINGSSL_DISPATCH_TEST + mov BYTE[BORINGSSL_function_hit],1 +%endif + cmp rdx,1 + jne NEAR $L$ctr32_bulk + + + + movups xmm2,XMMWORD[r8] + movups xmm3,XMMWORD[rdi] + mov edx,DWORD[240+rcx] + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_5: + DB 102,15,56,220,209 + dec edx + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_5 + DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD[rsi],xmm2 + xorps xmm2,xmm2 + jmp NEAR $L$ctr32_epilogue + +ALIGN 16 +$L$ctr32_bulk: + lea r11,[rsp] + + push rbp + + sub rsp,288 + and rsp,-16 + movaps XMMWORD[(-168)+r11],xmm6 + movaps XMMWORD[(-152)+r11],xmm7 + movaps XMMWORD[(-136)+r11],xmm8 + movaps XMMWORD[(-120)+r11],xmm9 + movaps XMMWORD[(-104)+r11],xmm10 + movaps XMMWORD[(-88)+r11],xmm11 + movaps XMMWORD[(-72)+r11],xmm12 + movaps XMMWORD[(-56)+r11],xmm13 + movaps XMMWORD[(-40)+r11],xmm14 + movaps XMMWORD[(-24)+r11],xmm15 +$L$ctr32_body: + + + + + movdqu xmm2,XMMWORD[r8] + movdqu xmm0,XMMWORD[rcx] + mov r8d,DWORD[12+r8] + pxor xmm2,xmm0 + mov ebp,DWORD[12+rcx] + movdqa XMMWORD[rsp],xmm2 + bswap r8d + movdqa xmm3,xmm2 + movdqa xmm4,xmm2 + movdqa xmm5,xmm2 + movdqa XMMWORD[64+rsp],xmm2 + movdqa XMMWORD[80+rsp],xmm2 + movdqa XMMWORD[96+rsp],xmm2 + mov r10,rdx + movdqa XMMWORD[112+rsp],xmm2 + + lea rax,[1+r8] + lea rdx,[2+r8] + bswap eax + bswap edx + xor eax,ebp + xor edx,ebp +DB 102,15,58,34,216,3 + lea rax,[3+r8] + movdqa XMMWORD[16+rsp],xmm3 +DB 102,15,58,34,226,3 + bswap eax + mov rdx,r10 + lea r10,[4+r8] + movdqa XMMWORD[32+rsp],xmm4 + xor eax,ebp + bswap r10d +DB 102,15,58,34,232,3 + xor r10d,ebp + movdqa XMMWORD[48+rsp],xmm5 + lea r9,[5+r8] + mov DWORD[((64+12))+rsp],r10d + bswap r9d + lea r10,[6+r8] + mov eax,DWORD[240+rcx] + xor r9d,ebp + bswap r10d + mov DWORD[((80+12))+rsp],r9d + xor r10d,ebp + lea r9,[7+r8] + mov DWORD[((96+12))+rsp],r10d + bswap r9d + xor r9d,ebp + mov DWORD[((112+12))+rsp],r9d + + movups xmm1,XMMWORD[16+rcx] + + movdqa xmm6,XMMWORD[64+rsp] + movdqa xmm7,XMMWORD[80+rsp] + + cmp rdx,8 + jb NEAR $L$ctr32_tail + + lea rcx,[128+rcx] + sub rdx,8 + jmp NEAR $L$ctr32_loop8 + +ALIGN 32 +$L$ctr32_loop8: + add r8d,8 + movdqa xmm8,XMMWORD[96+rsp] + DB 102,15,56,220,209 + mov r9d,r8d + movdqa xmm9,XMMWORD[112+rsp] + DB 102,15,56,220,217 + bswap r9d + movups xmm0,XMMWORD[((32-128))+rcx] + DB 102,15,56,220,225 + xor r9d,ebp + nop + DB 102,15,56,220,233 + mov DWORD[((0+12))+rsp],r9d + lea r9,[1+r8] + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((48-128))+rcx] + bswap r9d + DB 102,15,56,220,208 + DB 102,15,56,220,216 + xor r9d,ebp + DB 0x66,0x90 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + mov DWORD[((16+12))+rsp],r9d + lea r9,[2+r8] + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((64-128))+rcx] + bswap r9d + DB 102,15,56,220,209 + DB 102,15,56,220,217 + xor r9d,ebp + DB 0x66,0x90 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + mov DWORD[((32+12))+rsp],r9d + lea r9,[3+r8] + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((80-128))+rcx] + bswap r9d + DB 102,15,56,220,208 + DB 102,15,56,220,216 + xor r9d,ebp + DB 0x66,0x90 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + mov DWORD[((48+12))+rsp],r9d + lea r9,[4+r8] + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((96-128))+rcx] + bswap r9d + DB 102,15,56,220,209 + DB 102,15,56,220,217 + xor r9d,ebp + DB 0x66,0x90 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + mov DWORD[((64+12))+rsp],r9d + lea r9,[5+r8] + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((112-128))+rcx] + bswap r9d + DB 102,15,56,220,208 + DB 102,15,56,220,216 + xor r9d,ebp + DB 0x66,0x90 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + mov DWORD[((80+12))+rsp],r9d + lea r9,[6+r8] + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((128-128))+rcx] + bswap r9d + DB 102,15,56,220,209 + DB 102,15,56,220,217 + xor r9d,ebp + DB 0x66,0x90 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + mov DWORD[((96+12))+rsp],r9d + lea r9,[7+r8] + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((144-128))+rcx] + bswap r9d + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + xor r9d,ebp + movdqu xmm10,XMMWORD[rdi] + DB 102,15,56,220,232 + mov DWORD[((112+12))+rsp],r9d + cmp eax,11 + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((160-128))+rcx] + + jb NEAR $L$ctr32_enc_done + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((176-128))+rcx] + + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((192-128))+rcx] + je NEAR $L$ctr32_enc_done + + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((208-128))+rcx] + + DB 102,15,56,220,208 + DB 102,15,56,220,216 + DB 102,15,56,220,224 + DB 102,15,56,220,232 + DB 102,15,56,220,240 + DB 102,15,56,220,248 + DB 102,68,15,56,220,192 + DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((224-128))+rcx] + jmp NEAR $L$ctr32_enc_done + +ALIGN 16 +$L$ctr32_enc_done: + movdqu xmm11,XMMWORD[16+rdi] + pxor xmm10,xmm0 + movdqu xmm12,XMMWORD[32+rdi] + pxor xmm11,xmm0 + movdqu xmm13,XMMWORD[48+rdi] + pxor xmm12,xmm0 + movdqu xmm14,XMMWORD[64+rdi] + pxor xmm13,xmm0 + movdqu xmm15,XMMWORD[80+rdi] + pxor xmm14,xmm0 + prefetcht0 [448+rdi] + prefetcht0 [512+rdi] + pxor xmm15,xmm0 + DB 102,15,56,220,209 + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + DB 102,15,56,220,241 + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + DB 102,68,15,56,220,201 + movdqu xmm1,XMMWORD[96+rdi] + lea rdi,[128+rdi] + + DB 102,65,15,56,221,210 + pxor xmm1,xmm0 + movdqu xmm10,XMMWORD[((112-128))+rdi] + DB 102,65,15,56,221,219 + pxor xmm10,xmm0 + movdqa xmm11,XMMWORD[rsp] + DB 102,65,15,56,221,228 + DB 102,65,15,56,221,237 + movdqa xmm12,XMMWORD[16+rsp] + movdqa xmm13,XMMWORD[32+rsp] + DB 102,65,15,56,221,246 + DB 102,65,15,56,221,255 + movdqa xmm14,XMMWORD[48+rsp] + movdqa xmm15,XMMWORD[64+rsp] + DB 102,68,15,56,221,193 + movdqa xmm0,XMMWORD[80+rsp] + movups xmm1,XMMWORD[((16-128))+rcx] + DB 102,69,15,56,221,202 + + movups XMMWORD[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD[80+rsi],xmm7 + movdqa xmm7,xmm0 + movups XMMWORD[96+rsi],xmm8 + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + + sub rdx,8 + jnc NEAR $L$ctr32_loop8 + + add rdx,8 + jz NEAR $L$ctr32_done + lea rcx,[((-128))+rcx] + +$L$ctr32_tail: + + + lea rcx,[16+rcx] + cmp rdx,4 + jb NEAR $L$ctr32_loop3 + je NEAR $L$ctr32_loop4 + + + shl eax,4 + movdqa xmm8,XMMWORD[96+rsp] + pxor xmm9,xmm9 + + movups xmm0,XMMWORD[16+rcx] + DB 102,15,56,220,209 + DB 102,15,56,220,217 + lea rcx,[((32-16))+rax*1+rcx] + neg rax + DB 102,15,56,220,225 + add rax,16 + movups xmm10,XMMWORD[rdi] + DB 102,15,56,220,233 + DB 102,15,56,220,241 + movups xmm11,XMMWORD[16+rdi] + movups xmm12,XMMWORD[32+rdi] + DB 102,15,56,220,249 + DB 102,68,15,56,220,193 + + call $L$enc_loop8_enter + + movdqu xmm13,XMMWORD[48+rdi] + pxor xmm2,xmm10 + movdqu xmm10,XMMWORD[64+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm6,xmm10 + movdqu XMMWORD[48+rsi],xmm5 + movdqu XMMWORD[64+rsi],xmm6 + cmp rdx,6 + jb NEAR $L$ctr32_done + + movups xmm11,XMMWORD[80+rdi] + xorps xmm7,xmm11 + movups XMMWORD[80+rsi],xmm7 + je NEAR $L$ctr32_done + + movups xmm12,XMMWORD[96+rdi] + xorps xmm8,xmm12 + movups XMMWORD[96+rsi],xmm8 + jmp NEAR $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop4: + DB 102,15,56,220,209 + lea rcx,[16+rcx] + dec eax + DB 102,15,56,220,217 + DB 102,15,56,220,225 + DB 102,15,56,220,233 + movups xmm1,XMMWORD[rcx] + jnz NEAR $L$ctr32_loop4 + DB 102,15,56,221,209 + DB 102,15,56,221,217 + movups xmm10,XMMWORD[rdi] + movups xmm11,XMMWORD[16+rdi] + DB 102,15,56,221,225 + DB 102,15,56,221,233 + movups xmm12,XMMWORD[32+rdi] + movups xmm13,XMMWORD[48+rdi] + + xorps xmm2,xmm10 + movups XMMWORD[rsi],xmm2 + xorps xmm3,xmm11 + movups XMMWORD[16+rsi],xmm3 + pxor xmm4,xmm12 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm5,xmm13 + movdqu XMMWORD[48+rsi],xmm5 + jmp NEAR $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop3: + DB 102,15,56,220,209 + lea rcx,[16+rcx] + dec eax + DB 102,15,56,220,217 + DB 102,15,56,220,225 + movups xmm1,XMMWORD[rcx] + jnz NEAR $L$ctr32_loop3 + DB 102,15,56,221,209 + DB 102,15,56,221,217 + DB 102,15,56,221,225 + + movups xmm10,XMMWORD[rdi] + xorps xmm2,xmm10 + movups XMMWORD[rsi],xmm2 + cmp rdx,2 + jb NEAR $L$ctr32_done + + movups xmm11,XMMWORD[16+rdi] + xorps xmm3,xmm11 + movups XMMWORD[16+rsi],xmm3 + je NEAR $L$ctr32_done + + movups xmm12,XMMWORD[32+rdi] + xorps xmm4,xmm12 + movups XMMWORD[32+rsi],xmm4 + +$L$ctr32_done: + xorps xmm0,xmm0 + xor ebp,ebp + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movaps xmm6,XMMWORD[((-168))+r11] + movaps XMMWORD[(-168)+r11],xmm0 + movaps xmm7,XMMWORD[((-152))+r11] + movaps XMMWORD[(-152)+r11],xmm0 + movaps xmm8,XMMWORD[((-136))+r11] + movaps XMMWORD[(-136)+r11],xmm0 + movaps xmm9,XMMWORD[((-120))+r11] + movaps XMMWORD[(-120)+r11],xmm0 + movaps xmm10,XMMWORD[((-104))+r11] + movaps XMMWORD[(-104)+r11],xmm0 + movaps xmm11,XMMWORD[((-88))+r11] + movaps XMMWORD[(-88)+r11],xmm0 + movaps xmm12,XMMWORD[((-72))+r11] + movaps XMMWORD[(-72)+r11],xmm0 + movaps xmm13,XMMWORD[((-56))+r11] + movaps XMMWORD[(-56)+r11],xmm0 + movaps xmm14,XMMWORD[((-40))+r11] + movaps XMMWORD[(-40)+r11],xmm0 + movaps xmm15,XMMWORD[((-24))+r11] + movaps XMMWORD[(-24)+r11],xmm0 + movaps XMMWORD[rsp],xmm0 + movaps XMMWORD[16+rsp],xmm0 + movaps XMMWORD[32+rsp],xmm0 + movaps XMMWORD[48+rsp],xmm0 + movaps XMMWORD[64+rsp],xmm0 + movaps XMMWORD[80+rsp],xmm0 + movaps XMMWORD[96+rsp],xmm0 + movaps XMMWORD[112+rsp],xmm0 + mov rbp,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$ctr32_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes_hw_ctr32_encrypt_blocks: +global aes_hw_cbc_encrypt + +ALIGN 16 +aes_hw_cbc_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes_hw_cbc_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + test rdx,rdx + jz NEAR $L$cbc_ret + + mov r10d,DWORD[240+rcx] + mov r11,rcx + test r9d,r9d + jz NEAR $L$cbc_decrypt + + movups xmm2,XMMWORD[r8] + mov eax,r10d + cmp rdx,16 + jb NEAR $L$cbc_enc_tail + sub rdx,16 + jmp NEAR $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop: + movups xmm3,XMMWORD[rdi] + lea rdi,[16+rdi] + + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + xorps xmm3,xmm0 + lea rcx,[32+rcx] + xorps xmm2,xmm3 +$L$oop_enc1_6: + DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_6 + DB 102,15,56,221,209 + mov eax,r10d + mov rcx,r11 + movups XMMWORD[rsi],xmm2 + lea rsi,[16+rsi] + sub rdx,16 + jnc NEAR $L$cbc_enc_loop + add rdx,16 + jnz NEAR $L$cbc_enc_tail + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups XMMWORD[r8],xmm2 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + jmp NEAR $L$cbc_ret + +$L$cbc_enc_tail: + mov rcx,rdx + xchg rsi,rdi + DD 0x9066A4F3 + mov ecx,16 + sub rcx,rdx + xor eax,eax + DD 0x9066AAF3 + lea rdi,[((-16))+rdi] + mov eax,r10d + mov rsi,rdi + mov rcx,r11 + xor rdx,rdx + jmp NEAR $L$cbc_enc_loop + +ALIGN 16 +$L$cbc_decrypt: + cmp rdx,16 + jne NEAR $L$cbc_decrypt_bulk + + + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[r8] + movdqa xmm4,xmm2 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_7: + DB 102,15,56,222,209 + dec r10d + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_7 + DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqu XMMWORD[r8],xmm4 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + jmp NEAR $L$cbc_ret +ALIGN 16 +$L$cbc_decrypt_bulk: + lea r11,[rsp] + + push rbp + + sub rsp,176 + and rsp,-16 + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$cbc_decrypt_body: + mov rbp,rcx + movups xmm10,XMMWORD[r8] + mov eax,r10d + cmp rdx,0x50 + jbe NEAR $L$cbc_dec_tail + + movups xmm0,XMMWORD[rcx] + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD[80+rdi] + movdqa xmm15,xmm6 + cmp rdx,0x70 + jbe NEAR $L$cbc_dec_six_or_seven + + sub rdx,0x70 + lea rcx,[112+rcx] + jmp NEAR $L$cbc_dec_loop8_enter +ALIGN 16 +$L$cbc_dec_loop8: + movups XMMWORD[rsi],xmm9 + lea rsi,[16+rsi] +$L$cbc_dec_loop8_enter: + movdqu xmm8,XMMWORD[96+rdi] + pxor xmm2,xmm0 + movdqu xmm9,XMMWORD[112+rdi] + pxor xmm3,xmm0 + movups xmm1,XMMWORD[((16-112))+rcx] + pxor xmm4,xmm0 + mov rbp,-1 + cmp rdx,0x70 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + + DB 102,15,56,222,209 + pxor xmm9,xmm0 + movups xmm0,XMMWORD[((32-112))+rcx] + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + adc rbp,0 + and rbp,128 + DB 102,68,15,56,222,201 + add rbp,rdi + movups xmm1,XMMWORD[((48-112))+rcx] + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((64-112))+rcx] + nop + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((80-112))+rcx] + nop + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((96-112))+rcx] + nop + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((112-112))+rcx] + nop + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((128-112))+rcx] + nop + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((144-112))+rcx] + cmp eax,11 + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((160-112))+rcx] + jb NEAR $L$cbc_dec_done + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((176-112))+rcx] + nop + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((192-112))+rcx] + je NEAR $L$cbc_dec_done + DB 102,15,56,222,209 + DB 102,15,56,222,217 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((208-112))+rcx] + nop + DB 102,15,56,222,208 + DB 102,15,56,222,216 + DB 102,15,56,222,224 + DB 102,15,56,222,232 + DB 102,15,56,222,240 + DB 102,15,56,222,248 + DB 102,68,15,56,222,192 + DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((224-112))+rcx] + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_done: + DB 102,15,56,222,209 + DB 102,15,56,222,217 + pxor xmm10,xmm0 + pxor xmm11,xmm0 + DB 102,15,56,222,225 + DB 102,15,56,222,233 + pxor xmm12,xmm0 + pxor xmm13,xmm0 + DB 102,15,56,222,241 + DB 102,15,56,222,249 + pxor xmm14,xmm0 + pxor xmm15,xmm0 + DB 102,68,15,56,222,193 + DB 102,68,15,56,222,201 + movdqu xmm1,XMMWORD[80+rdi] + + DB 102,65,15,56,223,210 + movdqu xmm10,XMMWORD[96+rdi] + pxor xmm1,xmm0 + DB 102,65,15,56,223,219 + pxor xmm10,xmm0 + movdqu xmm0,XMMWORD[112+rdi] + DB 102,65,15,56,223,228 + lea rdi,[128+rdi] + movdqu xmm11,XMMWORD[rbp] + DB 102,65,15,56,223,237 + DB 102,65,15,56,223,246 + movdqu xmm12,XMMWORD[16+rbp] + movdqu xmm13,XMMWORD[32+rbp] + DB 102,65,15,56,223,255 + DB 102,68,15,56,223,193 + movdqu xmm14,XMMWORD[48+rbp] + movdqu xmm15,XMMWORD[64+rbp] + DB 102,69,15,56,223,202 + movdqa xmm10,xmm0 + movdqu xmm1,XMMWORD[80+rbp] + movups xmm0,XMMWORD[((-112))+rcx] + + movups XMMWORD[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD[80+rsi],xmm7 + movdqa xmm7,xmm1 + movups XMMWORD[96+rsi],xmm8 + lea rsi,[112+rsi] + + sub rdx,0x80 + ja NEAR $L$cbc_dec_loop8 + + movaps xmm2,xmm9 + lea rcx,[((-112))+rcx] + add rdx,0x70 + jle NEAR $L$cbc_dec_clear_tail_collected + movups XMMWORD[rsi],xmm9 + lea rsi,[16+rsi] + cmp rdx,0x50 + jbe NEAR $L$cbc_dec_tail + + movaps xmm2,xmm11 +$L$cbc_dec_six_or_seven: + cmp rdx,0x60 + ja NEAR $L$cbc_dec_seven + + movaps xmm8,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + lea rsi,[80+rsi] + movdqa xmm2,xmm7 + pxor xmm7,xmm7 + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_seven: + movups xmm8,XMMWORD[96+rdi] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups xmm9,XMMWORD[80+rdi] + pxor xmm2,xmm10 + movups xmm10,XMMWORD[96+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + pxor xmm8,xmm9 + movdqu XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + lea rsi,[96+rsi] + movdqa xmm2,xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 + jmp NEAR $L$cbc_dec_tail_collected + +$L$cbc_dec_tail: + movups xmm2,XMMWORD[rdi] + sub rdx,0x10 + jbe NEAR $L$cbc_dec_one + + movups xmm3,XMMWORD[16+rdi] + movaps xmm11,xmm2 + sub rdx,0x10 + jbe NEAR $L$cbc_dec_two + + movups xmm4,XMMWORD[32+rdi] + movaps xmm12,xmm3 + sub rdx,0x10 + jbe NEAR $L$cbc_dec_three + + movups xmm5,XMMWORD[48+rdi] + movaps xmm13,xmm4 + sub rdx,0x10 + jbe NEAR $L$cbc_dec_four + + movups xmm6,XMMWORD[64+rdi] + movaps xmm14,xmm5 + movaps xmm15,xmm6 + xorps xmm7,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm15 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + lea rsi,[64+rsi] + movdqa xmm2,xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + sub rdx,0x10 + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_one: + movaps xmm11,xmm2 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_8: + DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_8 + DB 102,15,56,223,209 + xorps xmm2,xmm10 + movaps xmm10,xmm11 + jmp NEAR $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_two: + movaps xmm12,xmm3 + call _aesni_decrypt2 + pxor xmm2,xmm10 + movaps xmm10,xmm12 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + movdqa xmm2,xmm3 + pxor xmm3,xmm3 + lea rsi,[16+rsi] + jmp NEAR $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_three: + movaps xmm13,xmm4 + call _aesni_decrypt3 + pxor xmm2,xmm10 + movaps xmm10,xmm13 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movdqa xmm2,xmm4 + pxor xmm4,xmm4 + lea rsi,[32+rsi] + jmp NEAR $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_four: + movaps xmm14,xmm5 + call _aesni_decrypt4 + pxor xmm2,xmm10 + movaps xmm10,xmm14 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movdqa xmm2,xmm5 + pxor xmm5,xmm5 + lea rsi,[48+rsi] + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_clear_tail_collected: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 +$L$cbc_dec_tail_collected: + movups XMMWORD[r8],xmm10 + and rdx,15 + jnz NEAR $L$cbc_dec_tail_partial + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + jmp NEAR $L$cbc_dec_ret +ALIGN 16 +$L$cbc_dec_tail_partial: + movaps XMMWORD[rsp],xmm2 + pxor xmm2,xmm2 + mov rcx,16 + mov rdi,rsi + sub rcx,rdx + lea rsi,[rsp] + DD 0x9066A4F3 + movdqa XMMWORD[rsp],xmm2 + +$L$cbc_dec_ret: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + movaps xmm6,XMMWORD[16+rsp] + movaps XMMWORD[16+rsp],xmm0 + movaps xmm7,XMMWORD[32+rsp] + movaps XMMWORD[32+rsp],xmm0 + movaps xmm8,XMMWORD[48+rsp] + movaps XMMWORD[48+rsp],xmm0 + movaps xmm9,XMMWORD[64+rsp] + movaps XMMWORD[64+rsp],xmm0 + movaps xmm10,XMMWORD[80+rsp] + movaps XMMWORD[80+rsp],xmm0 + movaps xmm11,XMMWORD[96+rsp] + movaps XMMWORD[96+rsp],xmm0 + movaps xmm12,XMMWORD[112+rsp] + movaps XMMWORD[112+rsp],xmm0 + movaps xmm13,XMMWORD[128+rsp] + movaps XMMWORD[128+rsp],xmm0 + movaps xmm14,XMMWORD[144+rsp] + movaps XMMWORD[144+rsp],xmm0 + movaps xmm15,XMMWORD[160+rsp] + movaps XMMWORD[160+rsp],xmm0 + mov rbp,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$cbc_ret: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes_hw_cbc_encrypt: +global aes_hw_set_decrypt_key + +ALIGN 16 +aes_hw_set_decrypt_key: + +_CET_ENDBR + DB 0x48,0x83,0xEC,0x08 + + call __aesni_set_encrypt_key + shl edx,4 + test eax,eax + jnz NEAR $L$dec_key_ret + lea rcx,[16+rdx*1+r8] + + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[rcx] + movups XMMWORD[rcx],xmm0 + movups XMMWORD[r8],xmm1 + lea r8,[16+r8] + lea rcx,[((-16))+rcx] + +$L$dec_key_inverse: + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[rcx] + DB 102,15,56,219,192 + DB 102,15,56,219,201 + lea r8,[16+r8] + lea rcx,[((-16))+rcx] + movups XMMWORD[16+rcx],xmm0 + movups XMMWORD[(-16)+r8],xmm1 + cmp rcx,r8 + ja NEAR $L$dec_key_inverse + + movups xmm0,XMMWORD[r8] + DB 102,15,56,219,192 + pxor xmm1,xmm1 + movups XMMWORD[rcx],xmm0 + pxor xmm0,xmm0 +$L$dec_key_ret: + add rsp,8 + + ret + +$L$SEH_end_set_decrypt_key: + +global aes_hw_set_encrypt_key + +ALIGN 16 +aes_hw_set_encrypt_key: +__aesni_set_encrypt_key: + +_CET_ENDBR +%ifdef BORINGSSL_DISPATCH_TEST + mov BYTE[((BORINGSSL_function_hit+3))],1 +%endif + DB 0x48,0x83,0xEC,0x08 + + mov rax,-1 + test rcx,rcx + jz NEAR $L$enc_key_ret + test r8,r8 + jz NEAR $L$enc_key_ret + + movups xmm0,XMMWORD[rcx] + xorps xmm4,xmm4 + lea r10,[OPENSSL_ia32cap_P] + mov r10d,DWORD[4+r10] + and r10d,268437504 + lea rax,[16+r8] + cmp edx,256 + je NEAR $L$14rounds + cmp edx,192 + je NEAR $L$12rounds + cmp edx,128 + jne NEAR $L$bad_keybits + +$L$10rounds: + mov edx,9 + cmp r10d,268435456 + je NEAR $L$10rounds_alt + + movups XMMWORD[r8],xmm0 + DB 102,15,58,223,200,1 + call $L$key_expansion_128_cold + DB 102,15,58,223,200,2 + call $L$key_expansion_128 + DB 102,15,58,223,200,4 + call $L$key_expansion_128 + DB 102,15,58,223,200,8 + call $L$key_expansion_128 + DB 102,15,58,223,200,16 + call $L$key_expansion_128 + DB 102,15,58,223,200,32 + call $L$key_expansion_128 + DB 102,15,58,223,200,64 + call $L$key_expansion_128 + DB 102,15,58,223,200,128 + call $L$key_expansion_128 + DB 102,15,58,223,200,27 + call $L$key_expansion_128 + DB 102,15,58,223,200,54 + call $L$key_expansion_128 + movups XMMWORD[rax],xmm0 + mov DWORD[80+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$10rounds_alt: + movdqa xmm5,XMMWORD[$L$key_rotate] + mov r10d,8 + movdqa xmm4,XMMWORD[$L$key_rcon1] + movdqa xmm2,xmm0 + movdqu XMMWORD[r8],xmm0 + jmp NEAR $L$oop_key128 + +ALIGN 16 +$L$oop_key128: +DB 102,15,56,0,197 + DB 102,15,56,221,196 + pslld xmm4,1 + lea rax,[16+rax] + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD[(-16)+rax],xmm0 + movdqa xmm2,xmm0 + + dec r10d + jnz NEAR $L$oop_key128 + + movdqa xmm4,XMMWORD[$L$key_rcon1b] + +DB 102,15,56,0,197 + DB 102,15,56,221,196 + pslld xmm4,1 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD[rax],xmm0 + + movdqa xmm2,xmm0 +DB 102,15,56,0,197 + DB 102,15,56,221,196 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD[16+rax],xmm0 + + mov DWORD[96+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$12rounds: + movq xmm2,QWORD[16+rcx] + mov edx,11 + cmp r10d,268435456 + je NEAR $L$12rounds_alt + + movups XMMWORD[r8],xmm0 + DB 102,15,58,223,202,1 + call $L$key_expansion_192a_cold + DB 102,15,58,223,202,2 + call $L$key_expansion_192b + DB 102,15,58,223,202,4 + call $L$key_expansion_192a + DB 102,15,58,223,202,8 + call $L$key_expansion_192b + DB 102,15,58,223,202,16 + call $L$key_expansion_192a + DB 102,15,58,223,202,32 + call $L$key_expansion_192b + DB 102,15,58,223,202,64 + call $L$key_expansion_192a + DB 102,15,58,223,202,128 + call $L$key_expansion_192b + movups XMMWORD[rax],xmm0 + mov DWORD[48+rax],edx + xor rax,rax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$12rounds_alt: + movdqa xmm5,XMMWORD[$L$key_rotate192] + movdqa xmm4,XMMWORD[$L$key_rcon1] + mov r10d,8 + movdqu XMMWORD[r8],xmm0 + jmp NEAR $L$oop_key192 + +ALIGN 16 +$L$oop_key192: + movq QWORD[rax],xmm2 + movdqa xmm1,xmm2 +DB 102,15,56,0,213 + DB 102,15,56,221,212 + pslld xmm4,1 + lea rax,[24+rax] + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + + pshufd xmm3,xmm0,0xff + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu XMMWORD[(-16)+rax],xmm0 + + dec r10d + jnz NEAR $L$oop_key192 + + mov DWORD[32+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$14rounds: + movups xmm2,XMMWORD[16+rcx] + mov edx,13 + lea rax,[16+rax] + cmp r10d,268435456 + je NEAR $L$14rounds_alt + + movups XMMWORD[r8],xmm0 + movups XMMWORD[16+r8],xmm2 + DB 102,15,58,223,202,1 + call $L$key_expansion_256a_cold + DB 102,15,58,223,200,1 + call $L$key_expansion_256b + DB 102,15,58,223,202,2 + call $L$key_expansion_256a + DB 102,15,58,223,200,2 + call $L$key_expansion_256b + DB 102,15,58,223,202,4 + call $L$key_expansion_256a + DB 102,15,58,223,200,4 + call $L$key_expansion_256b + DB 102,15,58,223,202,8 + call $L$key_expansion_256a + DB 102,15,58,223,200,8 + call $L$key_expansion_256b + DB 102,15,58,223,202,16 + call $L$key_expansion_256a + DB 102,15,58,223,200,16 + call $L$key_expansion_256b + DB 102,15,58,223,202,32 + call $L$key_expansion_256a + DB 102,15,58,223,200,32 + call $L$key_expansion_256b + DB 102,15,58,223,202,64 + call $L$key_expansion_256a + movups XMMWORD[rax],xmm0 + mov DWORD[16+rax],edx + xor rax,rax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$14rounds_alt: + movdqa xmm5,XMMWORD[$L$key_rotate] + movdqa xmm4,XMMWORD[$L$key_rcon1] + mov r10d,7 + movdqu XMMWORD[r8],xmm0 + movdqa xmm1,xmm2 + movdqu XMMWORD[16+r8],xmm2 + jmp NEAR $L$oop_key256 + +ALIGN 16 +$L$oop_key256: +DB 102,15,56,0,213 + DB 102,15,56,221,212 + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + + pxor xmm0,xmm2 + movdqu XMMWORD[rax],xmm0 + + dec r10d + jz NEAR $L$done_key256 + + pshufd xmm2,xmm0,0xff + pxor xmm3,xmm3 + DB 102,15,56,221,211 + + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + + pxor xmm2,xmm1 + movdqu XMMWORD[16+rax],xmm2 + lea rax,[32+rax] + movdqa xmm1,xmm2 + + jmp NEAR $L$oop_key256 + +$L$done_key256: + mov DWORD[16+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$bad_keybits: + mov rax,-2 +$L$enc_key_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + add rsp,8 + + ret + +$L$SEH_end_set_encrypt_key: + +ALIGN 16 +$L$key_expansion_128: + movups XMMWORD[rax],xmm0 + lea rax,[16+rax] +$L$key_expansion_128_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret + +ALIGN 16 +$L$key_expansion_192a: + movups XMMWORD[rax],xmm0 + lea rax,[16+rax] +$L$key_expansion_192a_cold: + movaps xmm5,xmm2 +$L$key_expansion_192b_warm: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + ret + +ALIGN 16 +$L$key_expansion_192b: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD[rax],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD[16+rax],xmm3 + lea rax,[32+rax] + jmp NEAR $L$key_expansion_192b_warm + +ALIGN 16 +$L$key_expansion_256a: + movups XMMWORD[rax],xmm2 + lea rax,[16+rax] +$L$key_expansion_256a_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret + +ALIGN 16 +$L$key_expansion_256b: + movups XMMWORD[rax],xmm0 + lea rax,[16+rax] + + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + ret + + +section .rdata rdata align=8 +ALIGN 64 +$L$bswap_mask: + DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$increment32: + DD 6,6,6,0 +$L$increment64: + DD 1,0,0,0 +$L$xts_magic: + DD 0x87,0,1,0 +$L$increment1: + DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +$L$key_rotate: + DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +$L$key_rotate192: + DD 0x04070605,0x04070605,0x04070605,0x04070605 +$L$key_rcon1: + DD 1,1,1,1 +$L$key_rcon1b: + DD 0x1b,0x1b,0x1b,0x1b + + DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 + DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 + DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 + DB 115,108,46,111,114,103,62,0 +ALIGN 64 +section .text + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +ecb_ccm64_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[rax] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + lea rax,[88+rax] + + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +ctr_xts_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[208+r8] + + lea rsi,[((-168))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + mov rbp,QWORD[((-8))+rax] + mov QWORD[160+r8],rbp + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +cbc_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[152+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$cbc_decrypt_bulk] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[120+r8] + + lea r10,[$L$cbc_decrypt_body] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea r10,[$L$cbc_ret] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[16+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + mov rax,QWORD[208+r8] + + mov rbp,QWORD[((-8))+rax] + mov QWORD[160+r8],rbp + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_aes_hw_ecb_encrypt wrt ..imagebase + DD $L$SEH_end_aes_hw_ecb_encrypt wrt ..imagebase + DD $L$SEH_info_ecb wrt ..imagebase + + DD $L$SEH_begin_aes_hw_ctr32_encrypt_blocks wrt ..imagebase + DD $L$SEH_end_aes_hw_ctr32_encrypt_blocks wrt ..imagebase + DD $L$SEH_info_ctr32 wrt ..imagebase + DD $L$SEH_begin_aes_hw_cbc_encrypt wrt ..imagebase + DD $L$SEH_end_aes_hw_cbc_encrypt wrt ..imagebase + DD $L$SEH_info_cbc wrt ..imagebase + + DD aes_hw_set_decrypt_key wrt ..imagebase + DD $L$SEH_end_set_decrypt_key wrt ..imagebase + DD $L$SEH_info_key wrt ..imagebase + + DD aes_hw_set_encrypt_key wrt ..imagebase + DD $L$SEH_end_set_encrypt_key wrt ..imagebase + DD $L$SEH_info_key wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ecb: + DB 9,0,0,0 + DD ecb_ccm64_se_handler wrt ..imagebase + DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase +$L$SEH_info_ctr32: + DB 9,0,0,0 + DD ctr_xts_se_handler wrt ..imagebase + DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase +$L$SEH_info_cbc: + DB 9,0,0,0 + DD cbc_se_handler wrt ..imagebase +$L$SEH_info_key: + DB 0x01,0x04,0x01,0x00 + DB 0x04,0x02,0x00,0x00 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-armv7-linux.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv7-linux.S new file mode 100644 index 0000000000..420af9b6ea --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv7-linux.S @@ -0,0 +1,789 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +#include + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) +.fpu neon +.code 32 +#undef __thumb2__ +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.text + +.globl aes_hw_set_encrypt_key +.hidden aes_hw_set_encrypt_key +.type aes_hw_set_encrypt_key,%function +.align 5 +aes_hw_set_encrypt_key: +.Lenc_key: + mov r3,#-1 + cmp r0,#0 + beq .Lenc_key_abort + cmp r2,#0 + beq .Lenc_key_abort + mov r3,#-2 + cmp r1,#128 + blt .Lenc_key_abort + cmp r1,#256 + bgt .Lenc_key_abort + tst r1,#0x3f + bne .Lenc_key_abort + + adr r3,.Lrcon + cmp r1,#192 + + veor q0,q0,q0 + vld1.8 {q3},[r0]! + mov r1,#8 @ reuse r1 + vld1.32 {q1,q2},[r3]! + + blt .Loop128 + beq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + bne .Loop128 + + vld1.32 {q1},[r3] + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + veor q3,q3,q10 + vst1.32 {q3},[r2] + add r2,r2,#0x50 + + mov r12,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {d16},[r0]! + vmov.i8 q10,#8 @ borrow q10 + vst1.32 {q3},[r2]! + vsub.i8 q2,q2,q10 @ adjust the mask + +.Loop192: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {d16},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + + vdup.32 q9,d7[1] + veor q9,q9,q8 + veor q10,q10,q1 + vext.8 q8,q0,q8,#12 + vshl.u8 q1,q1,#1 + veor q8,q8,q9 + veor q3,q3,q10 + veor q8,q8,q10 + vst1.32 {q3},[r2]! + bne .Loop192 + + mov r12,#12 + add r2,r2,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {q8},[r0] + mov r1,#7 + mov r12,#14 + vst1.32 {q3},[r2]! + +.Loop256: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q8},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + vst1.32 {q3},[r2]! + beq .Ldone + + vdup.32 q10,d7[1] + vext.8 q9,q0,q8,#12 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + + veor q8,q8,q10 + b .Loop256 + +.Ldone: + str r12,[r2] + mov r3,#0 + +.Lenc_key_abort: + mov r0,r3 @ return value + + bx lr +.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key + +.globl aes_hw_set_decrypt_key +.hidden aes_hw_set_decrypt_key +.type aes_hw_set_decrypt_key,%function +.align 5 +aes_hw_set_decrypt_key: + stmdb sp!,{r4,lr} + bl .Lenc_key + + cmp r0,#0 + bne .Ldec_key_abort + + sub r2,r2,#240 @ restore original r2 + mov r4,#-16 + add r0,r2,r12,lsl#4 @ end of key schedule + + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + +.Loop_imc: + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + cmp r0,r2 + bhi .Loop_imc + + vld1.32 {q0},[r2] +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vst1.32 {q0},[r0] + + eor r0,r0,r0 @ return value +.Ldec_key_abort: + ldmia sp!,{r4,pc} +.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key +.globl aes_hw_encrypt +.hidden aes_hw_encrypt +.type aes_hw_encrypt,%function +.align 5 +aes_hw_encrypt: + AARCH64_VALID_CALL_TARGET + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_enc: +.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2]! + subs r3,r3,#2 +.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q1},[r2]! + bgt .Loop_enc + +.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2] +.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_hw_encrypt,.-aes_hw_encrypt +.globl aes_hw_decrypt +.hidden aes_hw_decrypt +.type aes_hw_decrypt,%function +.align 5 +aes_hw_decrypt: + AARCH64_VALID_CALL_TARGET + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_dec: +.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2]! + subs r3,r3,#2 +.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q1},[r2]! + bgt .Loop_dec + +.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2] +.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_hw_decrypt,.-aes_hw_decrypt +.globl aes_hw_cbc_encrypt +.hidden aes_hw_cbc_encrypt +.type aes_hw_cbc_encrypt,%function +.align 5 +aes_hw_cbc_encrypt: + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,lr} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldmia ip,{r4,r5} @ load remaining args + subs r2,r2,#16 + mov r8,#16 + blo .Lcbc_abort + moveq r8,#0 + + cmp r5,#0 @ en- or decrypting? + ldr r5,[r3,#240] + and r2,r2,#-16 + vld1.8 {q6},[r4] + vld1.8 {q0},[r0],r8 + + vld1.32 {q8,q9},[r3] @ load key schedule... + sub r5,r5,#6 + add r7,r3,r5,lsl#4 @ pointer to last 7 round keys + sub r5,r5,#2 + vld1.32 {q10,q11},[r7]! + vld1.32 {q12,q13},[r7]! + vld1.32 {q14,q15},[r7]! + vld1.32 {q7},[r7] + + add r7,r3,#32 + mov r6,r5 + beq .Lcbc_dec + + cmp r5,#2 + veor q0,q0,q6 + veor q5,q8,q7 + beq .Lcbc_enc128 + + vld1.32 {q2,q3},[r7] + add r7,r3,#16 + add r6,r3,#16*4 + add r12,r3,#16*5 +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r14,r3,#16*6 + add r3,r3,#16*7 + b .Lenter_cbc_enc + +.align 4 +.Loop_cbc_enc: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc: +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r6] + cmp r5,#4 +.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r12] + beq .Lcbc_enc192 + +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r14] +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r3] + nop + +.Lcbc_enc192: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 +.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r7] @ re-pre-load rndkey[1] +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc + + vst1.8 {q6},[r1]! + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {q2,q3},[r7] +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc128: +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 +.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 +.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc128 + + vst1.8 {q6},[r1]! + b .Lcbc_done +.align 5 +.Lcbc_dec: + vld1.8 {q10},[r0]! + subs r2,r2,#32 @ bias + add r6,r5,#2 + vorr q3,q0,q0 + vorr q1,q0,q0 + vorr q11,q10,q10 + blo .Lcbc_dec_tail + + vorr q1,q10,q10 + vld1.8 {q10},[r0]! + vorr q2,q0,q0 + vorr q3,q1,q1 + vorr q11,q10,q10 + +.Loop3x_cbc_dec: +.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! + bgt .Loop3x_cbc_dec + +.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q4,q6,q7 + subs r2,r2,#0x30 + veor q5,q2,q7 + movlo r6,r2 @ r6, r6, is zero at this point +.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 + add r0,r0,r6 @ r0 is adjusted in such way that + @ at exit from the loop q1-q10 + @ are loaded with last "words" + vorr q6,q11,q11 + mov r7,r3 +.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q2},[r0]! +.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q3},[r0]! +.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q11},[r0]! +.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 +.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 +.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + add r6,r5,#2 + veor q4,q4,q0 + veor q5,q5,q1 + veor q10,q10,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vst1.8 {q4},[r1]! + vorr q0,q2,q2 + vst1.8 {q5},[r1]! + vorr q1,q3,q3 + vst1.8 {q10},[r1]! + vorr q10,q11,q11 + bhs .Loop3x_cbc_dec + + cmn r2,#0x30 + beq .Lcbc_done + nop + +.Lcbc_dec_tail: +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! + bgt .Lcbc_dec_tail + +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + cmn r2,#0x20 +.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q6,q7 +.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 +.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 +.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + beq .Lcbc_dec_one + veor q5,q5,q1 + veor q9,q9,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + vst1.8 {q9},[r1]! + b .Lcbc_done + +.Lcbc_dec_one: + veor q5,q5,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + +.Lcbc_done: + vst1.8 {q6},[r4] +.Lcbc_abort: + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,pc} +.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt +.globl aes_hw_ctr32_encrypt_blocks +.hidden aes_hw_ctr32_encrypt_blocks +.type aes_hw_ctr32_encrypt_blocks,%function +.align 5 +aes_hw_ctr32_encrypt_blocks: + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg + ldr r5,[r3,#240] + + ldr r8, [r4, #12] + vld1.32 {q0},[r4] + + vld1.32 {q8,q9},[r3] @ load key schedule... + sub r5,r5,#4 + mov r12,#16 + cmp r2,#2 + add r7,r3,r5,lsl#4 @ pointer to last 5 round keys + sub r5,r5,#2 + vld1.32 {q12,q13},[r7]! + vld1.32 {q14,q15},[r7]! + vld1.32 {q7},[r7] + add r7,r3,#32 + mov r6,r5 + movlo r12,#0 + + @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are + @ affected by silicon errata #1742098 [0] and #1655431 [1], + @ respectively, where the second instruction of an aese/aesmc + @ instruction pair may execute twice if an interrupt is taken right + @ after the first instruction consumes an input register of which a + @ single 32-bit lane has been updated the last time it was modified. + @ + @ This function uses a counter in one 32-bit lane. The + @ could write to q1 and q10 directly, but that trips this bugs. + @ We write to q6 and copy to the final register as a workaround. + @ + @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice + @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice +#ifndef __ARMEB__ + rev r8, r8 +#endif + add r10, r8, #1 + vorr q6,q0,q0 + rev r10, r10 + vmov.32 d13[1],r10 + add r8, r8, #2 + vorr q1,q6,q6 + bls .Lctr32_tail + rev r12, r8 + vmov.32 d13[1],r12 + sub r2,r2,#3 @ bias + vorr q10,q6,q6 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 +.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 +.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 +.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 +.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q9},[r7]! + bgt .Loop3x_ctr32 + +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 + vld1.8 {q2},[r0]! + add r9,r8,#1 +.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 +.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.8 {q3},[r0]! + rev r9,r9 +.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vld1.8 {q11},[r0]! + mov r7,r3 +.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 +.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 +.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + veor q2,q2,q7 + add r10,r8,#2 +.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 +.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + veor q3,q3,q7 + add r8,r8,#3 +.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + @ Note the logic to update q0, q1, and q1 is written to work + @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in + @ 32-bit mode. See the comment above. + veor q11,q11,q7 + vmov.32 d13[1], r9 +.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 +.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vorr q0,q6,q6 + rev r10,r10 +.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vmov.32 d13[1], r10 + rev r12,r8 +.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vorr q1,q6,q6 + vmov.32 d13[1], r12 +.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 +.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vorr q10,q6,q6 + subs r2,r2,#3 +.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 +.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 +.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 + + veor q2,q2,q4 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + vst1.8 {q2},[r1]! + veor q3,q3,q5 + mov r6,r5 + vst1.8 {q3},[r1]! + veor q11,q11,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vst1.8 {q11},[r1]! + bhs .Loop3x_ctr32 + + adds r2,r2,#3 + beq .Lctr32_done + cmp r2,#1 + mov r12,#16 + moveq r12,#0 + +.Lctr32_tail: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q9},[r7]! + bgt .Lctr32_tail + +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q2},[r0],r12 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q3},[r0] +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q2,q2,q7 +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q3,q3,q7 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 +.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 + + cmp r2,#1 + veor q2,q2,q0 + veor q3,q3,q1 + vst1.8 {q2},[r1]! + beq .Lctr32_done + vst1.8 {q3},[r1] + +.Lctr32_done: + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} +.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-apple.S new file mode 100644 index 0000000000..144c4af34b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-apple.S @@ -0,0 +1,791 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +#if __ARM_MAX_ARCH__>=7 +.text + +.section __TEXT,__const +.align 5 +Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.text + +.globl _aes_hw_set_encrypt_key +.private_extern _aes_hw_set_encrypt_key + +.align 5 +_aes_hw_set_encrypt_key: +Lenc_key: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq Lenc_key_abort + cmp x2,#0 + b.eq Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt Lenc_key_abort + cmp w1,#256 + b.gt Lenc_key_abort + tst w1,#0x3f + b.ne Lenc_key_abort + + adrp x3,Lrcon@PAGE + add x3,x3,Lrcon@PAGEOFF + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt Loop128 + b.eq L192 + b L256 + +.align 4 +Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b Ldone + +.align 4 +L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b Ldone + +.align 4 +L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b Loop256 + +Ldone: + str w12,[x2] + mov x3,#0 + +Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret + + +.globl _aes_hw_set_decrypt_key +.private_extern _aes_hw_set_decrypt_key + +.align 5 +_aes_hw_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl Lenc_key + + cmp x0,#0 + b.ne Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +Ldec_key_abort: + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl _aes_hw_encrypt +.private_extern _aes_hw_encrypt + +.align 5 +_aes_hw_encrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +Loop_enc: + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aese v2.16b,v1.16b + aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt Loop_enc + + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret + +.globl _aes_hw_decrypt +.private_extern _aes_hw_decrypt + +.align 5 +_aes_hw_decrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +Loop_dec: + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aesd v2.16b,v1.16b + aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt Loop_dec + + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret + +.globl _aes_hw_cbc_encrypt +.private_extern _aes_hw_cbc_encrypt + +.align 5 +_aes_hw_cbc_encrypt: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq Lcbc_enc128 + + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b Lenter_cbc_enc + +.align 4 +Loop_cbc_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +Lenter_cbc_enc: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop + +Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs Loop_cbc_enc + + st1 {v6.16b},[x1],#16 + b Lcbc_done + +.align 5 +Lcbc_enc128: + ld1 {v2.4s,v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b Lenter_cbc_enc128 +Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b Lcbc_done +.align 5 +Lcbc_dec: + ld1 {v18.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v19.16b,v18.16b,v18.16b + b.lo Lcbc_dec_tail + + orr v1.16b,v18.16b,v18.16b + ld1 {v18.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v19.16b,v18.16b,v18.16b + +Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v18.16b + // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + ld1 {v19.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v18.16b,v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v18.16b},[x1],#16 + orr v18.16b,v19.16b,v19.16b + b.hs Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq Lcbc_done + nop + +Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + b.eq Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b Lcbc_done + +Lcbc_dec_one: + eor v5.16b,v5.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + +Lcbc_done: + st1 {v6.16b},[x4] +Lcbc_abort: + ldr x29,[sp],#16 + ret + +.globl _aes_hw_ctr32_encrypt_blocks +.private_extern _aes_hw_ctr32_encrypt_blocks + +.align 5 +_aes_hw_ctr32_encrypt_blocks: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo + + // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are + // affected by silicon errata #1742098 [0] and #1655431 [1], + // respectively, where the second instruction of an aese/aesmc + // instruction pair may execute twice if an interrupt is taken right + // after the first instruction consumes an input register of which a + // single 32-bit lane has been updated the last time it was modified. + // + // This function uses a counter in one 32-bit lane. The vmov lines + // could write to v1.16b and v18.16b directly, but that trips this bugs. + // We write to v6.16b and copy to the final register as a workaround. + // + // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice + // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice +#ifndef __AARCH64EB__ + rev w8, w8 +#endif + add w10, w8, #1 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v6.s[3],w10 + add w8, w8, #2 + orr v1.16b,v6.16b,v6.16b + b.ls Lctr32_tail + rev w12, w8 + mov v6.s[3],w12 + sub x2,x2,#3 // bias + orr v18.16b,v6.16b,v6.16b + b Loop3x_ctr32 + +.align 4 +Loop3x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + add w9,w8,#1 + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + rev w9,w9 + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work + // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in + // 32-bit mode. See the comment above. + eor v19.16b,v19.16b,v7.16b + mov v6.s[3], w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + orr v0.16b,v6.16b,v6.16b + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + mov v6.s[3], w10 + rev w12,w8 + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + orr v1.16b,v6.16b,v6.16b + mov v6.s[3], w12 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + orr v18.16b,v6.16b,v6.16b + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs Loop3x_ctr32 + + adds x2,x2,#3 + b.eq Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +Lctr32_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq Lctr32_done + st1 {v3.16b},[x1] + +Lctr32_done: + ldr x29,[sp],#16 + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-linux.S new file mode 100644 index 0000000000..7d4bcb4ca1 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-linux.S @@ -0,0 +1,791 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv8-a+crypto +.section .rodata +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.text + +.globl aes_hw_set_encrypt_key +.hidden aes_hw_set_encrypt_key +.type aes_hw_set_encrypt_key,%function +.align 5 +aes_hw_set_encrypt_key: +.Lenc_key: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq .Lenc_key_abort + cmp x2,#0 + b.eq .Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt .Lenc_key_abort + cmp w1,#256 + b.gt .Lenc_key_abort + tst w1,#0x3f + b.ne .Lenc_key_abort + + adrp x3,.Lrcon + add x3,x3,:lo12:.Lrcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + mov x3,#0 + +.Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret +.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key + +.globl aes_hw_set_decrypt_key +.hidden aes_hw_set_decrypt_key +.type aes_hw_set_decrypt_key,%function +.align 5 +aes_hw_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +.Ldec_key_abort: + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key +.globl aes_hw_encrypt +.hidden aes_hw_encrypt +.type aes_hw_encrypt,%function +.align 5 +aes_hw_encrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aese v2.16b,v1.16b + aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_enc + + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_hw_encrypt,.-aes_hw_encrypt +.globl aes_hw_decrypt +.hidden aes_hw_decrypt +.type aes_hw_decrypt,%function +.align 5 +aes_hw_decrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aesd v2.16b,v1.16b + aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_dec + + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_hw_decrypt,.-aes_hw_decrypt +.globl aes_hw_cbc_encrypt +.hidden aes_hw_cbc_encrypt +.type aes_hw_cbc_encrypt,%function +.align 5 +aes_hw_cbc_encrypt: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 +.Loop_cbc_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop + +.Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc + + st1 {v6.16b},[x1],#16 + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s,v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done +.align 5 +.Lcbc_dec: + ld1 {v18.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v19.16b,v18.16b,v18.16b + b.lo .Lcbc_dec_tail + + orr v1.16b,v18.16b,v18.16b + ld1 {v18.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v19.16b,v18.16b,v18.16b + +.Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v18.16b + // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + ld1 {v19.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v18.16b,v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v18.16b},[x1],#16 + orr v18.16b,v19.16b,v19.16b + b.hs .Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + b.eq .Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lcbc_done + +.Lcbc_dec_one: + eor v5.16b,v5.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt +.globl aes_hw_ctr32_encrypt_blocks +.hidden aes_hw_ctr32_encrypt_blocks +.type aes_hw_ctr32_encrypt_blocks,%function +.align 5 +aes_hw_ctr32_encrypt_blocks: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo + + // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are + // affected by silicon errata #1742098 [0] and #1655431 [1], + // respectively, where the second instruction of an aese/aesmc + // instruction pair may execute twice if an interrupt is taken right + // after the first instruction consumes an input register of which a + // single 32-bit lane has been updated the last time it was modified. + // + // This function uses a counter in one 32-bit lane. The vmov lines + // could write to v1.16b and v18.16b directly, but that trips this bugs. + // We write to v6.16b and copy to the final register as a workaround. + // + // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice + // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice +#ifndef __AARCH64EB__ + rev w8, w8 +#endif + add w10, w8, #1 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v6.s[3],w10 + add w8, w8, #2 + orr v1.16b,v6.16b,v6.16b + b.ls .Lctr32_tail + rev w12, w8 + mov v6.s[3],w12 + sub x2,x2,#3 // bias + orr v18.16b,v6.16b,v6.16b + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + add w9,w8,#1 + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + rev w9,w9 + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work + // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in + // 32-bit mode. See the comment above. + eor v19.16b,v19.16b,v7.16b + mov v6.s[3], w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + orr v0.16b,v6.16b,v6.16b + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + mov v6.s[3], w10 + rev w12,w8 + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + orr v1.16b,v6.16b,v6.16b + mov v6.s[3], w12 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + orr v18.16b,v6.16b,v6.16b + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +.Lctr32_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-win.S new file mode 100644 index 0000000000..a3ab33afc9 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-armv8-win.S @@ -0,0 +1,803 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv8-a+crypto +.section .rodata +.align 5 +Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.text + +.globl aes_hw_set_encrypt_key + +.def aes_hw_set_encrypt_key + .type 32 +.endef +.align 5 +aes_hw_set_encrypt_key: +Lenc_key: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq Lenc_key_abort + cmp x2,#0 + b.eq Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt Lenc_key_abort + cmp w1,#256 + b.gt Lenc_key_abort + tst w1,#0x3f + b.ne Lenc_key_abort + + adrp x3,Lrcon + add x3,x3,:lo12:Lrcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt Loop128 + b.eq L192 + b L256 + +.align 4 +Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b Ldone + +.align 4 +L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b Ldone + +.align 4 +L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b Loop256 + +Ldone: + str w12,[x2] + mov x3,#0 + +Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret + + +.globl aes_hw_set_decrypt_key + +.def aes_hw_set_decrypt_key + .type 32 +.endef +.align 5 +aes_hw_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl Lenc_key + + cmp x0,#0 + b.ne Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +Ldec_key_abort: + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl aes_hw_encrypt + +.def aes_hw_encrypt + .type 32 +.endef +.align 5 +aes_hw_encrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +Loop_enc: + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aese v2.16b,v1.16b + aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt Loop_enc + + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret + +.globl aes_hw_decrypt + +.def aes_hw_decrypt + .type 32 +.endef +.align 5 +aes_hw_decrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +Loop_dec: + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aesd v2.16b,v1.16b + aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt Loop_dec + + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret + +.globl aes_hw_cbc_encrypt + +.def aes_hw_cbc_encrypt + .type 32 +.endef +.align 5 +aes_hw_cbc_encrypt: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq Lcbc_enc128 + + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b Lenter_cbc_enc + +.align 4 +Loop_cbc_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +Lenter_cbc_enc: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop + +Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs Loop_cbc_enc + + st1 {v6.16b},[x1],#16 + b Lcbc_done + +.align 5 +Lcbc_enc128: + ld1 {v2.4s,v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b Lenter_cbc_enc128 +Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b Lcbc_done +.align 5 +Lcbc_dec: + ld1 {v18.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v19.16b,v18.16b,v18.16b + b.lo Lcbc_dec_tail + + orr v1.16b,v18.16b,v18.16b + ld1 {v18.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v19.16b,v18.16b,v18.16b + +Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v18.16b + // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + ld1 {v19.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v18.16b,v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v18.16b},[x1],#16 + orr v18.16b,v19.16b,v19.16b + b.hs Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq Lcbc_done + nop + +Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + b.eq Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b Lcbc_done + +Lcbc_dec_one: + eor v5.16b,v5.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + +Lcbc_done: + st1 {v6.16b},[x4] +Lcbc_abort: + ldr x29,[sp],#16 + ret + +.globl aes_hw_ctr32_encrypt_blocks + +.def aes_hw_ctr32_encrypt_blocks + .type 32 +.endef +.align 5 +aes_hw_ctr32_encrypt_blocks: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo + + // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are + // affected by silicon errata #1742098 [0] and #1655431 [1], + // respectively, where the second instruction of an aese/aesmc + // instruction pair may execute twice if an interrupt is taken right + // after the first instruction consumes an input register of which a + // single 32-bit lane has been updated the last time it was modified. + // + // This function uses a counter in one 32-bit lane. The vmov lines + // could write to v1.16b and v18.16b directly, but that trips this bugs. + // We write to v6.16b and copy to the final register as a workaround. + // + // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice + // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice +#ifndef __AARCH64EB__ + rev w8, w8 +#endif + add w10, w8, #1 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v6.s[3],w10 + add w8, w8, #2 + orr v1.16b,v6.16b,v6.16b + b.ls Lctr32_tail + rev w12, w8 + mov v6.s[3],w12 + sub x2,x2,#3 // bias + orr v18.16b,v6.16b,v6.16b + b Loop3x_ctr32 + +.align 4 +Loop3x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + add w9,w8,#1 + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + rev w9,w9 + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work + // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in + // 32-bit mode. See the comment above. + eor v19.16b,v19.16b,v7.16b + mov v6.s[3], w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + orr v0.16b,v6.16b,v6.16b + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + mov v6.s[3], w10 + rev w12,w8 + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + orr v1.16b,v6.16b,v6.16b + mov v6.s[3], w12 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + orr v18.16b,v6.16b,v6.16b + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs Loop3x_ctr32 + + adds x2,x2,#3 + b.eq Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +Lctr32_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq Lctr32_done + st1 {v3.16b},[x1] + +Lctr32_done: + ldr x29,[sp],#16 + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-apple.S new file mode 100644 index 0000000000..13be797b48 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-apple.S @@ -0,0 +1,1555 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include +#if __ARM_MAX_ARCH__ >= 8 + + +.text +.globl _aes_gcm_enc_kernel +.private_extern _aes_gcm_enc_kernel + +.align 4 +_aes_gcm_enc_kernel: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp, #-128]! + mov x29, sp + stp x19, x20, [sp, #16] + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp d8, d9, [sp, #64] + stp d10, d11, [sp, #80] + stp d12, d13, [sp, #96] + stp d14, d15, [sp, #112] + ldr w17, [x8, #240] + add x19, x8, x17, lsl #4 // borrow input_l1 for last key + ldp x13, x14, [x19] // load round N keys + ldr q31, [x19, #-16] // load round N-1 keys + add x4, x0, x1, lsr #3 // end_input_ptr + lsr x5, x1, #3 // byte_len + mov x15, x5 + ldp x10, x11, [x16] // ctr96_b64, ctr96_t32 + ld1 { v0.16b}, [x16] // special case vector load initial counter so we can start first AES block as quickly as possible + sub x5, x5, #1 // byte_len - 1 + ldr q18, [x8, #0] // load rk0 + and x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + ldr q25, [x8, #112] // load rk7 + add x5, x5, x0 + lsr x12, x11, #32 + fmov d2, x10 // CTR block 2 + orr w11, w11, w11 + rev w12, w12 // rev_ctr32 + fmov d1, x10 // CTR block 1 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 0 - round 0 + add w12, w12, #1 // increment rev_ctr32 + rev w9, w12 // CTR block 1 + fmov d3, x10 // CTR block 3 + orr x9, x11, x9, lsl #32 // CTR block 1 + add w12, w12, #1 // CTR block 1 + ldr q19, [x8, #16] // load rk1 + fmov v1.d[1], x9 // CTR block 1 + rev w9, w12 // CTR block 2 + add w12, w12, #1 // CTR block 2 + orr x9, x11, x9, lsl #32 // CTR block 2 + ldr q20, [x8, #32] // load rk2 + fmov v2.d[1], x9 // CTR block 2 + rev w9, w12 // CTR block 3 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 0 - round 1 + orr x9, x11, x9, lsl #32 // CTR block 3 + fmov v3.d[1], x9 // CTR block 3 + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 1 - round 0 + ldr q21, [x8, #48] // load rk3 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 0 - round 2 + ldr q24, [x8, #96] // load rk6 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 2 - round 0 + ldr q23, [x8, #80] // load rk5 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 1 - round 1 + ldr q14, [x6, #48] // load h3l | h3h + ext v14.16b, v14.16b, v14.16b, #8 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 3 - round 0 + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 2 - round 1 + ldr q22, [x8, #64] // load rk4 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 1 - round 2 + ldr q13, [x6, #32] // load h2l | h2h + ext v13.16b, v13.16b, v13.16b, #8 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 3 - round 1 + ldr q30, [x8, #192] // load rk12 + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 2 - round 2 + ldr q15, [x6, #80] // load h4l | h4h + ext v15.16b, v15.16b, v15.16b, #8 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 1 - round 3 + ldr q29, [x8, #176] // load rk11 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 3 - round 2 + ldr q26, [x8, #128] // load rk8 + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 2 - round 3 + add w12, w12, #1 // CTR block 3 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 0 - round 3 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 3 - round 3 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 2 - round 4 + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 0 - round 4 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 1 - round 4 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 3 - round 4 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 0 - round 5 + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 1 - round 5 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 3 - round 5 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 2 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 1 - round 6 + trn2 v17.2d, v14.2d, v15.2d // h4l | h3l + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 3 - round 6 + ldr q27, [x8, #144] // load rk9 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 0 - round 6 + ldr q12, [x6] // load h1l | h1h + ext v12.16b, v12.16b, v12.16b, #8 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 2 - round 6 + ldr q28, [x8, #160] // load rk10 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 1 - round 7 + trn1 v9.2d, v14.2d, v15.2d // h4h | h3h + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 0 - round 7 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 2 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 3 - round 7 + trn2 v16.2d, v12.2d, v13.2d // h2l | h1l + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 1 - round 8 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 2 - round 8 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 3 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 0 - round 8 + b.lt Lenc_finish_first_blocks // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 1 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 2 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 3 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 0 - round 9 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 1 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 2 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 3 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 0 - round 10 + b.eq Lenc_finish_first_blocks // branch if AES-192 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 1 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 2 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 0 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 3 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 1 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 2 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 0 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 3 - round 12 + +Lenc_finish_first_blocks: + cmp x0, x5 // check if we have <= 4 blocks + eor v17.16b, v17.16b, v9.16b // h4k | h3k + aese v2.16b, v31.16b // AES block 2 - round N-1 + trn1 v8.2d, v12.2d, v13.2d // h2h | h1h + aese v1.16b, v31.16b // AES block 1 - round N-1 + aese v0.16b, v31.16b // AES block 0 - round N-1 + aese v3.16b, v31.16b // AES block 3 - round N-1 + eor v16.16b, v16.16b, v8.16b // h2k | h1k + b.ge Lenc_tail // handle tail + + ldp x19, x20, [x0, #16] // AES block 1 - load plaintext + rev w9, w12 // CTR block 4 + ldp x6, x7, [x0, #0] // AES block 0 - load plaintext + ldp x23, x24, [x0, #48] // AES block 3 - load plaintext + ldp x21, x22, [x0, #32] // AES block 2 - load plaintext + add x0, x0, #64 // AES input_ptr update + eor x19, x19, x13 // AES block 1 - round N low + eor x20, x20, x14 // AES block 1 - round N high + fmov d5, x19 // AES block 1 - mov low + eor x6, x6, x13 // AES block 0 - round N low + eor x7, x7, x14 // AES block 0 - round N high + eor x24, x24, x14 // AES block 3 - round N high + fmov d4, x6 // AES block 0 - mov low + cmp x0, x5 // check if we have <= 8 blocks + fmov v4.d[1], x7 // AES block 0 - mov high + eor x23, x23, x13 // AES block 3 - round N low + eor x21, x21, x13 // AES block 2 - round N low + fmov v5.d[1], x20 // AES block 1 - mov high + fmov d6, x21 // AES block 2 - mov low + add w12, w12, #1 // CTR block 4 + orr x9, x11, x9, lsl #32 // CTR block 4 + fmov d7, x23 // AES block 3 - mov low + eor x22, x22, x14 // AES block 2 - round N high + fmov v6.d[1], x22 // AES block 2 - mov high + eor v4.16b, v4.16b, v0.16b // AES block 0 - result + fmov d0, x10 // CTR block 4 + fmov v0.d[1], x9 // CTR block 4 + rev w9, w12 // CTR block 5 + add w12, w12, #1 // CTR block 5 + eor v5.16b, v5.16b, v1.16b // AES block 1 - result + fmov d1, x10 // CTR block 5 + orr x9, x11, x9, lsl #32 // CTR block 5 + fmov v1.d[1], x9 // CTR block 5 + rev w9, w12 // CTR block 6 + st1 { v4.16b}, [x2], #16 // AES block 0 - store result + fmov v7.d[1], x24 // AES block 3 - mov high + orr x9, x11, x9, lsl #32 // CTR block 6 + eor v6.16b, v6.16b, v2.16b // AES block 2 - result + st1 { v5.16b}, [x2], #16 // AES block 1 - store result + add w12, w12, #1 // CTR block 6 + fmov d2, x10 // CTR block 6 + fmov v2.d[1], x9 // CTR block 6 + st1 { v6.16b}, [x2], #16 // AES block 2 - store result + rev w9, w12 // CTR block 7 + orr x9, x11, x9, lsl #32 // CTR block 7 + eor v7.16b, v7.16b, v3.16b // AES block 3 - result + st1 { v7.16b}, [x2], #16 // AES block 3 - store result + b.ge Lenc_prepretail // do prepretail + +Lenc_main_loop: // main loop start + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d3, x10 // CTR block 4k+3 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + fmov v3.d[1], x9 // CTR block 4k+3 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + ldp x23, x24, [x0, #48] // AES block 4k+7 - load plaintext + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + ldp x21, x22, [x0, #32] // AES block 4k+6 - load plaintext + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + eor v4.16b, v4.16b, v11.16b // PRE 1 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + eor x23, x23, x13 // AES block 4k+7 - round N low + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + mov d10, v17.d[1] // GHASH block 4k - mid + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + eor x22, x22, x14 // AES block 4k+6 - round N high + mov d8, v4.d[1] // GHASH block 4k - mid + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 free) + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free) + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + ldp x19, x20, [x0, #16] // AES block 4k+5 - load plaintext + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + mov d4, v7.d[1] // GHASH block 4k+3 - mid + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor x19, x19, x13 // AES block 4k+5 - round N low + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + eor x21, x21, x13 // AES block 4k+6 - round N low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + movi v8.8b, #0xc2 + pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + cmp x17, #12 // setup flags for AES-128/192/256 check + fmov d5, x19 // AES block 4k+5 - mov low + ldp x6, x7, [x0, #0] // AES block 4k+4 - load plaintext + b.lt Lenc_main_loop_continue // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + b.eq Lenc_main_loop_continue // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +Lenc_main_loop_continue: + shl d8, d8, #56 // mod_constant + eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid + add w12, w12, #1 // CTR block 4k+3 + eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + add x0, x0, #64 // AES input_ptr update + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + rev w9, w12 // CTR block 4k+8 + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor x6, x6, x13 // AES block 4k+4 - round N low + eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up + eor x7, x7, x14 // AES block 4k+4 - round N high + fmov d4, x6 // AES block 4k+4 - mov low + orr x9, x11, x9, lsl #32 // CTR block 4k+8 + eor v7.16b, v9.16b, v7.16b // MODULO - fold into mid + eor x20, x20, x14 // AES block 4k+5 - round N high + eor x24, x24, x14 // AES block 4k+7 - round N high + add w12, w12, #1 // CTR block 4k+8 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + fmov v4.d[1], x7 // AES block 4k+4 - mov high + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + fmov d7, x23 // AES block 4k+7 - mov low + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + fmov v5.d[1], x20 // AES block 4k+5 - mov high + fmov d6, x21 // AES block 4k+6 - mov low + cmp x0, x5 // LOOP CONTROL + fmov v6.d[1], x22 // AES block 4k+6 - mov high + pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor v4.16b, v4.16b, v0.16b // AES block 4k+4 - result + fmov d0, x10 // CTR block 4k+8 + fmov v0.d[1], x9 // CTR block 4k+8 + rev w9, w12 // CTR block 4k+9 + add w12, w12, #1 // CTR block 4k+9 + eor v5.16b, v5.16b, v1.16b // AES block 4k+5 - result + fmov d1, x10 // CTR block 4k+9 + orr x9, x11, x9, lsl #32 // CTR block 4k+9 + fmov v1.d[1], x9 // CTR block 4k+9 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + rev w9, w12 // CTR block 4k+10 + st1 { v4.16b}, [x2], #16 // AES block 4k+4 - store result + orr x9, x11, x9, lsl #32 // CTR block 4k+10 + eor v11.16b, v11.16b, v9.16b // MODULO - fold into low + fmov v7.d[1], x24 // AES block 4k+7 - mov high + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + st1 { v5.16b}, [x2], #16 // AES block 4k+5 - store result + add w12, w12, #1 // CTR block 4k+10 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + eor v6.16b, v6.16b, v2.16b // AES block 4k+6 - result + fmov d2, x10 // CTR block 4k+10 + st1 { v6.16b}, [x2], #16 // AES block 4k+6 - store result + fmov v2.d[1], x9 // CTR block 4k+10 + rev w9, w12 // CTR block 4k+11 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + orr x9, x11, x9, lsl #32 // CTR block 4k+11 + eor v7.16b, v7.16b, v3.16b // AES block 4k+7 - result + st1 { v7.16b}, [x2], #16 // AES block 4k+7 - store result + b.lt Lenc_main_loop + +Lenc_prepretail: // PREPRETAIL + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free) + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + fmov d3, x10 // CTR block 4k+3 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) + fmov v3.d[1], x9 // CTR block 4k+3 + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + mov d8, v4.d[1] // GHASH block 4k - mid + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 free) + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + add w12, w12, #1 // CTR block 4k+3 + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + mov d4, v7.d[1] // GHASH block 4k+3 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + shl d8, d8, #56 // mod_constant + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid + pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v10.16b, v10.16b, v9.16b // karatsuba tidy up + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + pmull v4.1q, v9.1d, v8.1d + ext v9.16b, v9.16b, v9.16b, #8 + eor v10.16b, v10.16b, v11.16b + b.lt Lenc_finish_prepretail // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + b.eq Lenc_finish_prepretail // branch if AES-192 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + +Lenc_finish_prepretail: + eor v10.16b, v10.16b, v4.16b + eor v10.16b, v10.16b, v9.16b + pmull v4.1q, v10.1d, v8.1d + ext v10.16b, v10.16b, v10.16b, #8 + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + eor v11.16b, v11.16b, v4.16b + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + eor v11.16b, v11.16b, v10.16b + +Lenc_tail: // TAIL + ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag + sub x5, x4, x0 // main_end_input_ptr is number of bytes left to process + ldp x6, x7, [x0], #16 // AES block 4k+4 - load plaintext + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + cmp x5, #48 + fmov d4, x6 // AES block 4k+4 - mov low + fmov v4.d[1], x7 // AES block 4k+4 - mov high + eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result + b.gt Lenc_blocks_more_than_3 + cmp x5, #32 + mov v3.16b, v2.16b + movi v11.8b, #0 + movi v9.8b, #0 + sub w12, w12, #1 + mov v2.16b, v1.16b + movi v10.8b, #0 + b.gt Lenc_blocks_more_than_2 + mov v3.16b, v1.16b + sub w12, w12, #1 + cmp x5, #16 + b.gt Lenc_blocks_more_than_1 + sub w12, w12, #1 + b Lenc_blocks_less_than_1 +Lenc_blocks_more_than_3: // blocks left > 3 + st1 { v5.16b}, [x2], #16 // AES final-3 block - store result + ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high + rev64 v4.16b, v5.16b // GHASH final-3 block + eor x6, x6, x13 // AES final-2 block - round N low + eor v4.16b, v4.16b, v8.16b // feed in partial tag + eor x7, x7, x14 // AES final-2 block - round N high + mov d22, v4.d[1] // GHASH final-3 block - mid + fmov d5, x6 // AES final-2 block - mov low + fmov v5.d[1], x7 // AES final-2 block - mov high + eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid + movi v8.8b, #0 // suppress further partial tag feed in + mov d10, v17.d[1] // GHASH final-3 block - mid + pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low + pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high + pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid + eor v5.16b, v5.16b, v1.16b // AES final-2 block - result +Lenc_blocks_more_than_2: // blocks left > 2 + st1 { v5.16b}, [x2], #16 // AES final-2 block - store result + ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high + rev64 v4.16b, v5.16b // GHASH final-2 block + eor x6, x6, x13 // AES final-1 block - round N low + eor v4.16b, v4.16b, v8.16b // feed in partial tag + fmov d5, x6 // AES final-1 block - mov low + eor x7, x7, x14 // AES final-1 block - round N high + fmov v5.d[1], x7 // AES final-1 block - mov high + movi v8.8b, #0 // suppress further partial tag feed in + pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high + mov d22, v4.d[1] // GHASH final-2 block - mid + pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low + eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid + eor v5.16b, v5.16b, v2.16b // AES final-1 block - result + eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high + pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low + eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid +Lenc_blocks_more_than_1: // blocks left > 1 + st1 { v5.16b}, [x2], #16 // AES final-1 block - store result + rev64 v4.16b, v5.16b // GHASH final-1 block + ldp x6, x7, [x0], #16 // AES final block - load input low & high + eor v4.16b, v4.16b, v8.16b // feed in partial tag + movi v8.8b, #0 // suppress further partial tag feed in + eor x6, x6, x13 // AES final block - round N low + mov d22, v4.d[1] // GHASH final-1 block - mid + pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high + eor x7, x7, x14 // AES final block - round N high + eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high + ins v22.d[1], v22.d[0] // GHASH final-1 block - mid + fmov d5, x6 // AES final block - mov low + fmov v5.d[1], x7 // AES final block - mov high + pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid + pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low + eor v5.16b, v5.16b, v3.16b // AES final block - result + eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low +Lenc_blocks_less_than_1: // blocks left <= 1 + and x1, x1, #127 // bit_length %= 128 + mvn x13, xzr // rkN_l = 0xffffffffffffffff + sub x1, x1, #128 // bit_length -= 128 + neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128]) + ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial last block is to be stored + mvn x14, xzr // rkN_h = 0xffffffffffffffff + and x1, x1, #127 // bit_length %= 128 + lsr x14, x14, x1 // rkN_h is mask for top 64b of last block + cmp x1, #64 + csel x6, x13, x14, lt + csel x7, x14, xzr, lt + fmov d0, x6 // ctr0b is mask for last block + fmov v0.d[1], x7 + and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits + rev64 v4.16b, v5.16b // GHASH final block + eor v4.16b, v4.16b, v8.16b // feed in partial tag + bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of result before storing + pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high + mov d8, v4.d[1] // GHASH final block - mid + rev w9, w12 + pmull v21.1q, v4.1d, v12.1d // GHASH final block - low + eor v9.16b, v9.16b, v20.16b // GHASH final block - high + eor v8.8b, v8.8b, v4.8b // GHASH final block - mid + pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final block - low + eor v10.16b, v10.16b, v8.16b // GHASH final block - mid + movi v8.8b, #0xc2 + eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + shl d8, d8, #56 // mod_constant + eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + str w9, [x16, #12] // store the updated counter + st1 { v5.16b}, [x2] // store all 16B + eor v11.16b, v11.16b, v9.16b // MODULO - fold into low + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp d8, d9, [sp, #64] + ldp d10, d11, [sp, #80] + ldp d12, d13, [sp, #96] + ldp d14, d15, [sp, #112] + ldp x29, x30, [sp], #128 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl _aes_gcm_dec_kernel +.private_extern _aes_gcm_dec_kernel + +.align 4 +_aes_gcm_dec_kernel: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp, #-128]! + mov x29, sp + stp x19, x20, [sp, #16] + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp d8, d9, [sp, #64] + stp d10, d11, [sp, #80] + stp d12, d13, [sp, #96] + stp d14, d15, [sp, #112] + ldr w17, [x8, #240] + add x19, x8, x17, lsl #4 // borrow input_l1 for last key + ldp x13, x14, [x19] // load round N keys + ldr q31, [x19, #-16] // load round N-1 keys + lsr x5, x1, #3 // byte_len + mov x15, x5 + ldp x10, x11, [x16] // ctr96_b64, ctr96_t32 + ldr q26, [x8, #128] // load rk8 + sub x5, x5, #1 // byte_len - 1 + ldr q25, [x8, #112] // load rk7 + and x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + add x4, x0, x1, lsr #3 // end_input_ptr + ldr q24, [x8, #96] // load rk6 + lsr x12, x11, #32 + ldr q23, [x8, #80] // load rk5 + orr w11, w11, w11 + ldr q21, [x8, #48] // load rk3 + add x5, x5, x0 + rev w12, w12 // rev_ctr32 + add w12, w12, #1 // increment rev_ctr32 + fmov d3, x10 // CTR block 3 + rev w9, w12 // CTR block 1 + add w12, w12, #1 // CTR block 1 + fmov d1, x10 // CTR block 1 + orr x9, x11, x9, lsl #32 // CTR block 1 + ld1 { v0.16b}, [x16] // special case vector load initial counter so we can start first AES block as quickly as possible + fmov v1.d[1], x9 // CTR block 1 + rev w9, w12 // CTR block 2 + add w12, w12, #1 // CTR block 2 + fmov d2, x10 // CTR block 2 + orr x9, x11, x9, lsl #32 // CTR block 2 + fmov v2.d[1], x9 // CTR block 2 + rev w9, w12 // CTR block 3 + orr x9, x11, x9, lsl #32 // CTR block 3 + ldr q18, [x8, #0] // load rk0 + fmov v3.d[1], x9 // CTR block 3 + add w12, w12, #1 // CTR block 3 + ldr q22, [x8, #64] // load rk4 + ldr q19, [x8, #16] // load rk1 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 0 - round 0 + ldr q14, [x6, #48] // load h3l | h3h + ext v14.16b, v14.16b, v14.16b, #8 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 3 - round 0 + ldr q15, [x6, #80] // load h4l | h4h + ext v15.16b, v15.16b, v15.16b, #8 + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 1 - round 0 + ldr q13, [x6, #32] // load h2l | h2h + ext v13.16b, v13.16b, v13.16b, #8 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 2 - round 0 + ldr q20, [x8, #32] // load rk2 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 0 - round 1 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 1 - round 1 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 2 - round 1 + ldr q27, [x8, #144] // load rk9 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 3 - round 1 + ldr q30, [x8, #192] // load rk12 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 0 - round 2 + ldr q12, [x6] // load h1l | h1h + ext v12.16b, v12.16b, v12.16b, #8 + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 2 - round 2 + ldr q28, [x8, #160] // load rk10 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 3 - round 2 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 0 - round 3 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 1 - round 2 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 3 - round 3 + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 0 - round 4 + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 2 - round 3 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 1 - round 3 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 3 - round 4 + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 2 - round 4 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 1 - round 4 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 3 - round 5 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 0 - round 5 + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 1 - round 5 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 2 - round 5 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 0 - round 6 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 3 - round 6 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 1 - round 6 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 2 - round 6 + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 0 - round 7 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 1 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 3 - round 7 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 0 - round 8 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 2 - round 7 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 3 - round 8 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 1 - round 8 + ldr q29, [x8, #176] // load rk11 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 2 - round 8 + b.lt Ldec_finish_first_blocks // branch if AES-128 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 0 - round 9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 1 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 3 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 2 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 0 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 1 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 3 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 2 - round 10 + b.eq Ldec_finish_first_blocks // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 0 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 3 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 1 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 2 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 1 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 0 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 2 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 3 - round 12 + +Ldec_finish_first_blocks: + cmp x0, x5 // check if we have <= 4 blocks + trn1 v9.2d, v14.2d, v15.2d // h4h | h3h + trn2 v17.2d, v14.2d, v15.2d // h4l | h3l + trn1 v8.2d, v12.2d, v13.2d // h2h | h1h + trn2 v16.2d, v12.2d, v13.2d // h2l | h1l + eor v17.16b, v17.16b, v9.16b // h4k | h3k + aese v1.16b, v31.16b // AES block 1 - round N-1 + aese v2.16b, v31.16b // AES block 2 - round N-1 + eor v16.16b, v16.16b, v8.16b // h2k | h1k + aese v3.16b, v31.16b // AES block 3 - round N-1 + aese v0.16b, v31.16b // AES block 0 - round N-1 + b.ge Ldec_tail // handle tail + + ldr q4, [x0, #0] // AES block 0 - load ciphertext + ldr q5, [x0, #16] // AES block 1 - load ciphertext + rev w9, w12 // CTR block 4 + eor v0.16b, v4.16b, v0.16b // AES block 0 - result + eor v1.16b, v5.16b, v1.16b // AES block 1 - result + rev64 v5.16b, v5.16b // GHASH block 1 + ldr q7, [x0, #48] // AES block 3 - load ciphertext + mov x7, v0.d[1] // AES block 0 - mov high + mov x6, v0.d[0] // AES block 0 - mov low + rev64 v4.16b, v4.16b // GHASH block 0 + add w12, w12, #1 // CTR block 4 + fmov d0, x10 // CTR block 4 + orr x9, x11, x9, lsl #32 // CTR block 4 + fmov v0.d[1], x9 // CTR block 4 + rev w9, w12 // CTR block 5 + add w12, w12, #1 // CTR block 5 + mov x19, v1.d[0] // AES block 1 - mov low + orr x9, x11, x9, lsl #32 // CTR block 5 + mov x20, v1.d[1] // AES block 1 - mov high + eor x7, x7, x14 // AES block 0 - round N high + eor x6, x6, x13 // AES block 0 - round N low + stp x6, x7, [x2], #16 // AES block 0 - store result + fmov d1, x10 // CTR block 5 + ldr q6, [x0, #32] // AES block 2 - load ciphertext + add x0, x0, #64 // AES input_ptr update + fmov v1.d[1], x9 // CTR block 5 + rev w9, w12 // CTR block 6 + add w12, w12, #1 // CTR block 6 + eor x19, x19, x13 // AES block 1 - round N low + orr x9, x11, x9, lsl #32 // CTR block 6 + eor x20, x20, x14 // AES block 1 - round N high + stp x19, x20, [x2], #16 // AES block 1 - store result + eor v2.16b, v6.16b, v2.16b // AES block 2 - result + cmp x0, x5 // check if we have <= 8 blocks + b.ge Ldec_prepretail // do prepretail + +Ldec_main_loop: // main loop start + mov x21, v2.d[0] // AES block 4k+2 - mov low + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + mov x22, v2.d[1] // AES block 4k+2 - mov high + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d2, x10 // CTR block 4k+6 + fmov v2.d[1], x9 // CTR block 4k+6 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev w9, w12 // CTR block 4k+7 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + mov x24, v3.d[1] // AES block 4k+3 - mov high + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + mov x23, v3.d[0] // AES block 4k+3 - mov low + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + mov d8, v4.d[1] // GHASH block 4k - mid + fmov d3, x10 // CTR block 4k+7 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + orr x9, x11, x9, lsl #32 // CTR block 4k+7 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + fmov v3.d[1], x9 // CTR block 4k+7 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + eor x22, x22, x14 // AES block 4k+2 - round N high + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + rev64 v6.16b, v6.16b // GHASH block 4k+2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + eor x21, x21, x13 // AES block 4k+2 - round N low + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + stp x21, x22, [x2], #16 // AES block 4k+2 - store result + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + rev64 v7.16b, v7.16b // GHASH block 4k+3 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + eor x23, x23, x13 // AES block 4k+3 - round N low + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + eor x24, x24, x14 // AES block 4k+3 - round N high + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + add w12, w12, #1 // CTR block 4k+7 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + rev w9, w12 // CTR block 4k+8 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + add w12, w12, #1 // CTR block 4k+8 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + mov d6, v7.d[1] // GHASH block 4k+3 - mid + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + orr x9, x11, x9, lsl #32 // CTR block 4k+8 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + cmp x17, #12 // setup flags for AES-128/192/256 check + eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid + movi v8.8b, #0xc2 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + shl d8, d8, #56 // mod_constant + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + b.lt Ldec_main_loop_continue // branch if AES-128 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + b.eq Ldec_main_loop_continue // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +Ldec_main_loop_continue: + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + ldr q4, [x0, #0] // AES block 4k+4 - load ciphertext + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + ldr q5, [x0, #16] // AES block 4k+5 - load ciphertext + eor v0.16b, v4.16b, v0.16b // AES block 4k+4 - result + stp x23, x24, [x2], #16 // AES block 4k+3 - store result + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + ldr q7, [x0, #48] // AES block 4k+7 - load ciphertext + ldr q6, [x0, #32] // AES block 4k+6 - load ciphertext + mov x7, v0.d[1] // AES block 4k+4 - mov high + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + add x0, x0, #64 // AES input_ptr update + mov x6, v0.d[0] // AES block 4k+4 - mov low + fmov d0, x10 // CTR block 4k+8 + fmov v0.d[1], x9 // CTR block 4k+8 + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor v1.16b, v5.16b, v1.16b // AES block 4k+5 - result + rev w9, w12 // CTR block 4k+9 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + orr x9, x11, x9, lsl #32 // CTR block 4k+9 + cmp x0, x5 // LOOP CONTROL + add w12, w12, #1 // CTR block 4k+9 + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + mov x20, v1.d[1] // AES block 4k+5 - mov high + eor v2.16b, v6.16b, v2.16b // AES block 4k+6 - result + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + mov x19, v1.d[0] // AES block 4k+5 - mov low + fmov d1, x10 // CTR block 4k+9 + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + fmov v1.d[1], x9 // CTR block 4k+9 + rev w9, w12 // CTR block 4k+10 + add w12, w12, #1 // CTR block 4k+10 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + orr x9, x11, x9, lsl #32 // CTR block 4k+10 + rev64 v5.16b, v5.16b // GHASH block 4k+5 + eor x20, x20, x14 // AES block 4k+5 - round N high + stp x6, x7, [x2], #16 // AES block 4k+4 - store result + eor x19, x19, x13 // AES block 4k+5 - round N low + stp x19, x20, [x2], #16 // AES block 4k+5 - store result + rev64 v4.16b, v4.16b // GHASH block 4k+4 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + b.lt Ldec_main_loop + +Ldec_prepretail: // PREPRETAIL + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + mov x21, v2.d[0] // AES block 4k+2 - mov low + eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + mov x22, v2.d[1] // AES block 4k+2 - mov high + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d2, x10 // CTR block 4k+6 + fmov v2.d[1], x9 // CTR block 4k+6 + rev w9, w12 // CTR block 4k+7 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev64 v6.16b, v6.16b // GHASH block 4k+2 + orr x9, x11, x9, lsl #32 // CTR block 4k+7 + mov x23, v3.d[0] // AES block 4k+3 - mov low + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + mov x24, v3.d[1] // AES block 4k+3 - mov high + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + mov d8, v4.d[1] // GHASH block 4k - mid + fmov d3, x10 // CTR block 4k+7 + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + fmov v3.d[1], x9 // CTR block 4k+7 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + rev64 v7.16b, v7.16b // GHASH block 4k+3 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + mov d6, v7.d[1] // GHASH block 4k+3 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low + pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + cmp x17, #12 // setup flags for AES-128/192/256 check + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + shl d8, d8, #56 // mod_constant + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + b.lt Ldec_finish_prepretail // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + b.eq Ldec_finish_prepretail // branch if AES-192 + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +Ldec_finish_prepretail: + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor x22, x22, x14 // AES block 4k+2 - round N high + eor x23, x23, x13 // AES block 4k+3 - round N low + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + add w12, w12, #1 // CTR block 4k+7 + eor x21, x21, x13 // AES block 4k+2 - round N low + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor x24, x24, x14 // AES block 4k+3 - round N high + stp x21, x22, [x2], #16 // AES block 4k+2 - store result + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + stp x23, x24, [x2], #16 // AES block 4k+3 - store result + + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + +Ldec_tail: // TAIL + sub x5, x4, x0 // main_end_input_ptr is number of bytes left to process + ld1 { v5.16b}, [x0], #16 // AES block 4k+4 - load ciphertext + eor v0.16b, v5.16b, v0.16b // AES block 4k+4 - result + mov x6, v0.d[0] // AES block 4k+4 - mov low + mov x7, v0.d[1] // AES block 4k+4 - mov high + ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag + cmp x5, #48 + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + b.gt Ldec_blocks_more_than_3 + sub w12, w12, #1 + mov v3.16b, v2.16b + movi v10.8b, #0 + movi v11.8b, #0 + cmp x5, #32 + movi v9.8b, #0 + mov v2.16b, v1.16b + b.gt Ldec_blocks_more_than_2 + sub w12, w12, #1 + mov v3.16b, v1.16b + cmp x5, #16 + b.gt Ldec_blocks_more_than_1 + sub w12, w12, #1 + b Ldec_blocks_less_than_1 +Ldec_blocks_more_than_3: // blocks left > 3 + rev64 v4.16b, v5.16b // GHASH final-3 block + ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext + stp x6, x7, [x2], #16 // AES final-3 block - store result + mov d10, v17.d[1] // GHASH final-3 block - mid + eor v4.16b, v4.16b, v8.16b // feed in partial tag + eor v0.16b, v5.16b, v1.16b // AES final-2 block - result + mov d22, v4.d[1] // GHASH final-3 block - mid + mov x6, v0.d[0] // AES final-2 block - mov low + mov x7, v0.d[1] // AES final-2 block - mov high + eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid + movi v8.8b, #0 // suppress further partial tag feed in + pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high + pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid + eor x6, x6, x13 // AES final-2 block - round N low + pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low + eor x7, x7, x14 // AES final-2 block - round N high +Ldec_blocks_more_than_2: // blocks left > 2 + rev64 v4.16b, v5.16b // GHASH final-2 block + ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext + eor v4.16b, v4.16b, v8.16b // feed in partial tag + stp x6, x7, [x2], #16 // AES final-2 block - store result + eor v0.16b, v5.16b, v2.16b // AES final-1 block - result + mov d22, v4.d[1] // GHASH final-2 block - mid + pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low + pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high + eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid + mov x6, v0.d[0] // AES final-1 block - mov low + mov x7, v0.d[1] // AES final-1 block - mov high + eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low + movi v8.8b, #0 // suppress further partial tag feed in + pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high + eor x6, x6, x13 // AES final-1 block - round N low + eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid + eor x7, x7, x14 // AES final-1 block - round N high +Ldec_blocks_more_than_1: // blocks left > 1 + stp x6, x7, [x2], #16 // AES final-1 block - store result + rev64 v4.16b, v5.16b // GHASH final-1 block + ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext + eor v4.16b, v4.16b, v8.16b // feed in partial tag + movi v8.8b, #0 // suppress further partial tag feed in + mov d22, v4.d[1] // GHASH final-1 block - mid + eor v0.16b, v5.16b, v3.16b // AES final block - result + pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high + eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid + pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low + mov x6, v0.d[0] // AES final block - mov low + ins v22.d[1], v22.d[0] // GHASH final-1 block - mid + mov x7, v0.d[1] // AES final block - mov high + pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid + eor x6, x6, x13 // AES final block - round N low + eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low + eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high + eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid + eor x7, x7, x14 // AES final block - round N high +Ldec_blocks_less_than_1: // blocks left <= 1 + and x1, x1, #127 // bit_length %= 128 + mvn x14, xzr // rkN_h = 0xffffffffffffffff + sub x1, x1, #128 // bit_length -= 128 + mvn x13, xzr // rkN_l = 0xffffffffffffffff + ldp x4, x5, [x2] // load existing bytes we need to not overwrite + neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128]) + and x1, x1, #127 // bit_length %= 128 + lsr x14, x14, x1 // rkN_h is mask for top 64b of last block + cmp x1, #64 + csel x9, x13, x14, lt + csel x10, x14, xzr, lt + fmov d0, x9 // ctr0b is mask for last block + and x6, x6, x9 + mov v0.d[1], x10 + bic x4, x4, x9 // mask out low existing bytes + rev w9, w12 + bic x5, x5, x10 // mask out high existing bytes + orr x6, x6, x4 + and x7, x7, x10 + orr x7, x7, x5 + and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits + rev64 v4.16b, v5.16b // GHASH final block + eor v4.16b, v4.16b, v8.16b // feed in partial tag + pmull v21.1q, v4.1d, v12.1d // GHASH final block - low + mov d8, v4.d[1] // GHASH final block - mid + eor v8.8b, v8.8b, v4.8b // GHASH final block - mid + pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high + pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final block - high + eor v11.16b, v11.16b, v21.16b // GHASH final block - low + eor v10.16b, v10.16b, v8.16b // GHASH final block - mid + movi v8.8b, #0xc2 + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + shl d8, d8, #56 // mod_constant + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + stp x6, x7, [x2] + str w9, [x16, #12] // store the updated counter + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp d8, d9, [sp, #64] + ldp d10, d11, [sp, #80] + ldp d12, d13, [sp, #96] + ldp d14, d15, [sp, #112] + ldp x29, x30, [sp], #128 + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-linux.S new file mode 100644 index 0000000000..4283f93ca6 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-linux.S @@ -0,0 +1,1555 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include +#if __ARM_MAX_ARCH__ >= 8 + +.arch armv8-a+crypto +.text +.globl aes_gcm_enc_kernel +.hidden aes_gcm_enc_kernel +.type aes_gcm_enc_kernel,%function +.align 4 +aes_gcm_enc_kernel: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp, #-128]! + mov x29, sp + stp x19, x20, [sp, #16] + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp d8, d9, [sp, #64] + stp d10, d11, [sp, #80] + stp d12, d13, [sp, #96] + stp d14, d15, [sp, #112] + ldr w17, [x8, #240] + add x19, x8, x17, lsl #4 // borrow input_l1 for last key + ldp x13, x14, [x19] // load round N keys + ldr q31, [x19, #-16] // load round N-1 keys + add x4, x0, x1, lsr #3 // end_input_ptr + lsr x5, x1, #3 // byte_len + mov x15, x5 + ldp x10, x11, [x16] // ctr96_b64, ctr96_t32 + ld1 { v0.16b}, [x16] // special case vector load initial counter so we can start first AES block as quickly as possible + sub x5, x5, #1 // byte_len - 1 + ldr q18, [x8, #0] // load rk0 + and x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + ldr q25, [x8, #112] // load rk7 + add x5, x5, x0 + lsr x12, x11, #32 + fmov d2, x10 // CTR block 2 + orr w11, w11, w11 + rev w12, w12 // rev_ctr32 + fmov d1, x10 // CTR block 1 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 0 - round 0 + add w12, w12, #1 // increment rev_ctr32 + rev w9, w12 // CTR block 1 + fmov d3, x10 // CTR block 3 + orr x9, x11, x9, lsl #32 // CTR block 1 + add w12, w12, #1 // CTR block 1 + ldr q19, [x8, #16] // load rk1 + fmov v1.d[1], x9 // CTR block 1 + rev w9, w12 // CTR block 2 + add w12, w12, #1 // CTR block 2 + orr x9, x11, x9, lsl #32 // CTR block 2 + ldr q20, [x8, #32] // load rk2 + fmov v2.d[1], x9 // CTR block 2 + rev w9, w12 // CTR block 3 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 0 - round 1 + orr x9, x11, x9, lsl #32 // CTR block 3 + fmov v3.d[1], x9 // CTR block 3 + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 1 - round 0 + ldr q21, [x8, #48] // load rk3 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 0 - round 2 + ldr q24, [x8, #96] // load rk6 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 2 - round 0 + ldr q23, [x8, #80] // load rk5 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 1 - round 1 + ldr q14, [x6, #48] // load h3l | h3h + ext v14.16b, v14.16b, v14.16b, #8 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 3 - round 0 + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 2 - round 1 + ldr q22, [x8, #64] // load rk4 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 1 - round 2 + ldr q13, [x6, #32] // load h2l | h2h + ext v13.16b, v13.16b, v13.16b, #8 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 3 - round 1 + ldr q30, [x8, #192] // load rk12 + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 2 - round 2 + ldr q15, [x6, #80] // load h4l | h4h + ext v15.16b, v15.16b, v15.16b, #8 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 1 - round 3 + ldr q29, [x8, #176] // load rk11 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 3 - round 2 + ldr q26, [x8, #128] // load rk8 + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 2 - round 3 + add w12, w12, #1 // CTR block 3 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 0 - round 3 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 3 - round 3 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 2 - round 4 + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 0 - round 4 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 1 - round 4 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 3 - round 4 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 0 - round 5 + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 1 - round 5 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 3 - round 5 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 2 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 1 - round 6 + trn2 v17.2d, v14.2d, v15.2d // h4l | h3l + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 3 - round 6 + ldr q27, [x8, #144] // load rk9 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 0 - round 6 + ldr q12, [x6] // load h1l | h1h + ext v12.16b, v12.16b, v12.16b, #8 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 2 - round 6 + ldr q28, [x8, #160] // load rk10 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 1 - round 7 + trn1 v9.2d, v14.2d, v15.2d // h4h | h3h + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 0 - round 7 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 2 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 3 - round 7 + trn2 v16.2d, v12.2d, v13.2d // h2l | h1l + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 1 - round 8 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 2 - round 8 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 3 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 0 - round 8 + b.lt .Lenc_finish_first_blocks // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 1 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 2 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 3 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 0 - round 9 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 1 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 2 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 3 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 0 - round 10 + b.eq .Lenc_finish_first_blocks // branch if AES-192 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 1 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 2 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 0 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 3 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 1 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 2 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 0 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 3 - round 12 + +.Lenc_finish_first_blocks: + cmp x0, x5 // check if we have <= 4 blocks + eor v17.16b, v17.16b, v9.16b // h4k | h3k + aese v2.16b, v31.16b // AES block 2 - round N-1 + trn1 v8.2d, v12.2d, v13.2d // h2h | h1h + aese v1.16b, v31.16b // AES block 1 - round N-1 + aese v0.16b, v31.16b // AES block 0 - round N-1 + aese v3.16b, v31.16b // AES block 3 - round N-1 + eor v16.16b, v16.16b, v8.16b // h2k | h1k + b.ge .Lenc_tail // handle tail + + ldp x19, x20, [x0, #16] // AES block 1 - load plaintext + rev w9, w12 // CTR block 4 + ldp x6, x7, [x0, #0] // AES block 0 - load plaintext + ldp x23, x24, [x0, #48] // AES block 3 - load plaintext + ldp x21, x22, [x0, #32] // AES block 2 - load plaintext + add x0, x0, #64 // AES input_ptr update + eor x19, x19, x13 // AES block 1 - round N low + eor x20, x20, x14 // AES block 1 - round N high + fmov d5, x19 // AES block 1 - mov low + eor x6, x6, x13 // AES block 0 - round N low + eor x7, x7, x14 // AES block 0 - round N high + eor x24, x24, x14 // AES block 3 - round N high + fmov d4, x6 // AES block 0 - mov low + cmp x0, x5 // check if we have <= 8 blocks + fmov v4.d[1], x7 // AES block 0 - mov high + eor x23, x23, x13 // AES block 3 - round N low + eor x21, x21, x13 // AES block 2 - round N low + fmov v5.d[1], x20 // AES block 1 - mov high + fmov d6, x21 // AES block 2 - mov low + add w12, w12, #1 // CTR block 4 + orr x9, x11, x9, lsl #32 // CTR block 4 + fmov d7, x23 // AES block 3 - mov low + eor x22, x22, x14 // AES block 2 - round N high + fmov v6.d[1], x22 // AES block 2 - mov high + eor v4.16b, v4.16b, v0.16b // AES block 0 - result + fmov d0, x10 // CTR block 4 + fmov v0.d[1], x9 // CTR block 4 + rev w9, w12 // CTR block 5 + add w12, w12, #1 // CTR block 5 + eor v5.16b, v5.16b, v1.16b // AES block 1 - result + fmov d1, x10 // CTR block 5 + orr x9, x11, x9, lsl #32 // CTR block 5 + fmov v1.d[1], x9 // CTR block 5 + rev w9, w12 // CTR block 6 + st1 { v4.16b}, [x2], #16 // AES block 0 - store result + fmov v7.d[1], x24 // AES block 3 - mov high + orr x9, x11, x9, lsl #32 // CTR block 6 + eor v6.16b, v6.16b, v2.16b // AES block 2 - result + st1 { v5.16b}, [x2], #16 // AES block 1 - store result + add w12, w12, #1 // CTR block 6 + fmov d2, x10 // CTR block 6 + fmov v2.d[1], x9 // CTR block 6 + st1 { v6.16b}, [x2], #16 // AES block 2 - store result + rev w9, w12 // CTR block 7 + orr x9, x11, x9, lsl #32 // CTR block 7 + eor v7.16b, v7.16b, v3.16b // AES block 3 - result + st1 { v7.16b}, [x2], #16 // AES block 3 - store result + b.ge .Lenc_prepretail // do prepretail + +.Lenc_main_loop: // main loop start + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d3, x10 // CTR block 4k+3 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + fmov v3.d[1], x9 // CTR block 4k+3 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + ldp x23, x24, [x0, #48] // AES block 4k+7 - load plaintext + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + ldp x21, x22, [x0, #32] // AES block 4k+6 - load plaintext + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + eor v4.16b, v4.16b, v11.16b // PRE 1 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + eor x23, x23, x13 // AES block 4k+7 - round N low + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + mov d10, v17.d[1] // GHASH block 4k - mid + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + eor x22, x22, x14 // AES block 4k+6 - round N high + mov d8, v4.d[1] // GHASH block 4k - mid + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 free) + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free) + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + ldp x19, x20, [x0, #16] // AES block 4k+5 - load plaintext + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + mov d4, v7.d[1] // GHASH block 4k+3 - mid + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor x19, x19, x13 // AES block 4k+5 - round N low + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + eor x21, x21, x13 // AES block 4k+6 - round N low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + movi v8.8b, #0xc2 + pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + cmp x17, #12 // setup flags for AES-128/192/256 check + fmov d5, x19 // AES block 4k+5 - mov low + ldp x6, x7, [x0, #0] // AES block 4k+4 - load plaintext + b.lt .Lenc_main_loop_continue // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + b.eq .Lenc_main_loop_continue // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +.Lenc_main_loop_continue: + shl d8, d8, #56 // mod_constant + eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid + add w12, w12, #1 // CTR block 4k+3 + eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + add x0, x0, #64 // AES input_ptr update + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + rev w9, w12 // CTR block 4k+8 + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor x6, x6, x13 // AES block 4k+4 - round N low + eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up + eor x7, x7, x14 // AES block 4k+4 - round N high + fmov d4, x6 // AES block 4k+4 - mov low + orr x9, x11, x9, lsl #32 // CTR block 4k+8 + eor v7.16b, v9.16b, v7.16b // MODULO - fold into mid + eor x20, x20, x14 // AES block 4k+5 - round N high + eor x24, x24, x14 // AES block 4k+7 - round N high + add w12, w12, #1 // CTR block 4k+8 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + fmov v4.d[1], x7 // AES block 4k+4 - mov high + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + fmov d7, x23 // AES block 4k+7 - mov low + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + fmov v5.d[1], x20 // AES block 4k+5 - mov high + fmov d6, x21 // AES block 4k+6 - mov low + cmp x0, x5 // .LOOP CONTROL + fmov v6.d[1], x22 // AES block 4k+6 - mov high + pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor v4.16b, v4.16b, v0.16b // AES block 4k+4 - result + fmov d0, x10 // CTR block 4k+8 + fmov v0.d[1], x9 // CTR block 4k+8 + rev w9, w12 // CTR block 4k+9 + add w12, w12, #1 // CTR block 4k+9 + eor v5.16b, v5.16b, v1.16b // AES block 4k+5 - result + fmov d1, x10 // CTR block 4k+9 + orr x9, x11, x9, lsl #32 // CTR block 4k+9 + fmov v1.d[1], x9 // CTR block 4k+9 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + rev w9, w12 // CTR block 4k+10 + st1 { v4.16b}, [x2], #16 // AES block 4k+4 - store result + orr x9, x11, x9, lsl #32 // CTR block 4k+10 + eor v11.16b, v11.16b, v9.16b // MODULO - fold into low + fmov v7.d[1], x24 // AES block 4k+7 - mov high + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + st1 { v5.16b}, [x2], #16 // AES block 4k+5 - store result + add w12, w12, #1 // CTR block 4k+10 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + eor v6.16b, v6.16b, v2.16b // AES block 4k+6 - result + fmov d2, x10 // CTR block 4k+10 + st1 { v6.16b}, [x2], #16 // AES block 4k+6 - store result + fmov v2.d[1], x9 // CTR block 4k+10 + rev w9, w12 // CTR block 4k+11 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + orr x9, x11, x9, lsl #32 // CTR block 4k+11 + eor v7.16b, v7.16b, v3.16b // AES block 4k+7 - result + st1 { v7.16b}, [x2], #16 // AES block 4k+7 - store result + b.lt .Lenc_main_loop + +.Lenc_prepretail: // PREPRETAIL + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free) + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + fmov d3, x10 // CTR block 4k+3 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) + fmov v3.d[1], x9 // CTR block 4k+3 + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + mov d8, v4.d[1] // GHASH block 4k - mid + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 free) + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + add w12, w12, #1 // CTR block 4k+3 + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + mov d4, v7.d[1] // GHASH block 4k+3 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + shl d8, d8, #56 // mod_constant + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid + pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v10.16b, v10.16b, v9.16b // karatsuba tidy up + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + pmull v4.1q, v9.1d, v8.1d + ext v9.16b, v9.16b, v9.16b, #8 + eor v10.16b, v10.16b, v11.16b + b.lt .Lenc_finish_prepretail // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + b.eq .Lenc_finish_prepretail // branch if AES-192 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + +.Lenc_finish_prepretail: + eor v10.16b, v10.16b, v4.16b + eor v10.16b, v10.16b, v9.16b + pmull v4.1q, v10.1d, v8.1d + ext v10.16b, v10.16b, v10.16b, #8 + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + eor v11.16b, v11.16b, v4.16b + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + eor v11.16b, v11.16b, v10.16b + +.Lenc_tail: // TAIL + ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag + sub x5, x4, x0 // main_end_input_ptr is number of bytes left to process + ldp x6, x7, [x0], #16 // AES block 4k+4 - load plaintext + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + cmp x5, #48 + fmov d4, x6 // AES block 4k+4 - mov low + fmov v4.d[1], x7 // AES block 4k+4 - mov high + eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result + b.gt .Lenc_blocks_more_than_3 + cmp x5, #32 + mov v3.16b, v2.16b + movi v11.8b, #0 + movi v9.8b, #0 + sub w12, w12, #1 + mov v2.16b, v1.16b + movi v10.8b, #0 + b.gt .Lenc_blocks_more_than_2 + mov v3.16b, v1.16b + sub w12, w12, #1 + cmp x5, #16 + b.gt .Lenc_blocks_more_than_1 + sub w12, w12, #1 + b .Lenc_blocks_less_than_1 +.Lenc_blocks_more_than_3: // blocks left > 3 + st1 { v5.16b}, [x2], #16 // AES final-3 block - store result + ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high + rev64 v4.16b, v5.16b // GHASH final-3 block + eor x6, x6, x13 // AES final-2 block - round N low + eor v4.16b, v4.16b, v8.16b // feed in partial tag + eor x7, x7, x14 // AES final-2 block - round N high + mov d22, v4.d[1] // GHASH final-3 block - mid + fmov d5, x6 // AES final-2 block - mov low + fmov v5.d[1], x7 // AES final-2 block - mov high + eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid + movi v8.8b, #0 // suppress further partial tag feed in + mov d10, v17.d[1] // GHASH final-3 block - mid + pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low + pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high + pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid + eor v5.16b, v5.16b, v1.16b // AES final-2 block - result +.Lenc_blocks_more_than_2: // blocks left > 2 + st1 { v5.16b}, [x2], #16 // AES final-2 block - store result + ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high + rev64 v4.16b, v5.16b // GHASH final-2 block + eor x6, x6, x13 // AES final-1 block - round N low + eor v4.16b, v4.16b, v8.16b // feed in partial tag + fmov d5, x6 // AES final-1 block - mov low + eor x7, x7, x14 // AES final-1 block - round N high + fmov v5.d[1], x7 // AES final-1 block - mov high + movi v8.8b, #0 // suppress further partial tag feed in + pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high + mov d22, v4.d[1] // GHASH final-2 block - mid + pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low + eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid + eor v5.16b, v5.16b, v2.16b // AES final-1 block - result + eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high + pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low + eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid +.Lenc_blocks_more_than_1: // blocks left > 1 + st1 { v5.16b}, [x2], #16 // AES final-1 block - store result + rev64 v4.16b, v5.16b // GHASH final-1 block + ldp x6, x7, [x0], #16 // AES final block - load input low & high + eor v4.16b, v4.16b, v8.16b // feed in partial tag + movi v8.8b, #0 // suppress further partial tag feed in + eor x6, x6, x13 // AES final block - round N low + mov d22, v4.d[1] // GHASH final-1 block - mid + pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high + eor x7, x7, x14 // AES final block - round N high + eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high + ins v22.d[1], v22.d[0] // GHASH final-1 block - mid + fmov d5, x6 // AES final block - mov low + fmov v5.d[1], x7 // AES final block - mov high + pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid + pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low + eor v5.16b, v5.16b, v3.16b // AES final block - result + eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low +.Lenc_blocks_less_than_1: // blocks left <= 1 + and x1, x1, #127 // bit_length %= 128 + mvn x13, xzr // rkN_l = 0xffffffffffffffff + sub x1, x1, #128 // bit_length -= 128 + neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128]) + ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial last block is to be stored + mvn x14, xzr // rkN_h = 0xffffffffffffffff + and x1, x1, #127 // bit_length %= 128 + lsr x14, x14, x1 // rkN_h is mask for top 64b of last block + cmp x1, #64 + csel x6, x13, x14, lt + csel x7, x14, xzr, lt + fmov d0, x6 // ctr0b is mask for last block + fmov v0.d[1], x7 + and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits + rev64 v4.16b, v5.16b // GHASH final block + eor v4.16b, v4.16b, v8.16b // feed in partial tag + bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of result before storing + pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high + mov d8, v4.d[1] // GHASH final block - mid + rev w9, w12 + pmull v21.1q, v4.1d, v12.1d // GHASH final block - low + eor v9.16b, v9.16b, v20.16b // GHASH final block - high + eor v8.8b, v8.8b, v4.8b // GHASH final block - mid + pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final block - low + eor v10.16b, v10.16b, v8.16b // GHASH final block - mid + movi v8.8b, #0xc2 + eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + shl d8, d8, #56 // mod_constant + eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + str w9, [x16, #12] // store the updated counter + st1 { v5.16b}, [x2] // store all 16B + eor v11.16b, v11.16b, v9.16b // MODULO - fold into low + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp d8, d9, [sp, #64] + ldp d10, d11, [sp, #80] + ldp d12, d13, [sp, #96] + ldp d14, d15, [sp, #112] + ldp x29, x30, [sp], #128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size aes_gcm_enc_kernel,.-aes_gcm_enc_kernel +.globl aes_gcm_dec_kernel +.hidden aes_gcm_dec_kernel +.type aes_gcm_dec_kernel,%function +.align 4 +aes_gcm_dec_kernel: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp, #-128]! + mov x29, sp + stp x19, x20, [sp, #16] + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp d8, d9, [sp, #64] + stp d10, d11, [sp, #80] + stp d12, d13, [sp, #96] + stp d14, d15, [sp, #112] + ldr w17, [x8, #240] + add x19, x8, x17, lsl #4 // borrow input_l1 for last key + ldp x13, x14, [x19] // load round N keys + ldr q31, [x19, #-16] // load round N-1 keys + lsr x5, x1, #3 // byte_len + mov x15, x5 + ldp x10, x11, [x16] // ctr96_b64, ctr96_t32 + ldr q26, [x8, #128] // load rk8 + sub x5, x5, #1 // byte_len - 1 + ldr q25, [x8, #112] // load rk7 + and x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + add x4, x0, x1, lsr #3 // end_input_ptr + ldr q24, [x8, #96] // load rk6 + lsr x12, x11, #32 + ldr q23, [x8, #80] // load rk5 + orr w11, w11, w11 + ldr q21, [x8, #48] // load rk3 + add x5, x5, x0 + rev w12, w12 // rev_ctr32 + add w12, w12, #1 // increment rev_ctr32 + fmov d3, x10 // CTR block 3 + rev w9, w12 // CTR block 1 + add w12, w12, #1 // CTR block 1 + fmov d1, x10 // CTR block 1 + orr x9, x11, x9, lsl #32 // CTR block 1 + ld1 { v0.16b}, [x16] // special case vector load initial counter so we can start first AES block as quickly as possible + fmov v1.d[1], x9 // CTR block 1 + rev w9, w12 // CTR block 2 + add w12, w12, #1 // CTR block 2 + fmov d2, x10 // CTR block 2 + orr x9, x11, x9, lsl #32 // CTR block 2 + fmov v2.d[1], x9 // CTR block 2 + rev w9, w12 // CTR block 3 + orr x9, x11, x9, lsl #32 // CTR block 3 + ldr q18, [x8, #0] // load rk0 + fmov v3.d[1], x9 // CTR block 3 + add w12, w12, #1 // CTR block 3 + ldr q22, [x8, #64] // load rk4 + ldr q19, [x8, #16] // load rk1 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 0 - round 0 + ldr q14, [x6, #48] // load h3l | h3h + ext v14.16b, v14.16b, v14.16b, #8 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 3 - round 0 + ldr q15, [x6, #80] // load h4l | h4h + ext v15.16b, v15.16b, v15.16b, #8 + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 1 - round 0 + ldr q13, [x6, #32] // load h2l | h2h + ext v13.16b, v13.16b, v13.16b, #8 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 2 - round 0 + ldr q20, [x8, #32] // load rk2 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 0 - round 1 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 1 - round 1 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 2 - round 1 + ldr q27, [x8, #144] // load rk9 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 3 - round 1 + ldr q30, [x8, #192] // load rk12 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 0 - round 2 + ldr q12, [x6] // load h1l | h1h + ext v12.16b, v12.16b, v12.16b, #8 + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 2 - round 2 + ldr q28, [x8, #160] // load rk10 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 3 - round 2 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 0 - round 3 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 1 - round 2 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 3 - round 3 + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 0 - round 4 + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 2 - round 3 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 1 - round 3 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 3 - round 4 + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 2 - round 4 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 1 - round 4 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 3 - round 5 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 0 - round 5 + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 1 - round 5 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 2 - round 5 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 0 - round 6 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 3 - round 6 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 1 - round 6 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 2 - round 6 + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 0 - round 7 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 1 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 3 - round 7 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 0 - round 8 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 2 - round 7 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 3 - round 8 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 1 - round 8 + ldr q29, [x8, #176] // load rk11 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 2 - round 8 + b.lt .Ldec_finish_first_blocks // branch if AES-128 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 0 - round 9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 1 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 3 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 2 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 0 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 1 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 3 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 2 - round 10 + b.eq .Ldec_finish_first_blocks // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 0 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 3 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 1 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 2 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 1 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 0 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 2 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 3 - round 12 + +.Ldec_finish_first_blocks: + cmp x0, x5 // check if we have <= 4 blocks + trn1 v9.2d, v14.2d, v15.2d // h4h | h3h + trn2 v17.2d, v14.2d, v15.2d // h4l | h3l + trn1 v8.2d, v12.2d, v13.2d // h2h | h1h + trn2 v16.2d, v12.2d, v13.2d // h2l | h1l + eor v17.16b, v17.16b, v9.16b // h4k | h3k + aese v1.16b, v31.16b // AES block 1 - round N-1 + aese v2.16b, v31.16b // AES block 2 - round N-1 + eor v16.16b, v16.16b, v8.16b // h2k | h1k + aese v3.16b, v31.16b // AES block 3 - round N-1 + aese v0.16b, v31.16b // AES block 0 - round N-1 + b.ge .Ldec_tail // handle tail + + ldr q4, [x0, #0] // AES block 0 - load ciphertext + ldr q5, [x0, #16] // AES block 1 - load ciphertext + rev w9, w12 // CTR block 4 + eor v0.16b, v4.16b, v0.16b // AES block 0 - result + eor v1.16b, v5.16b, v1.16b // AES block 1 - result + rev64 v5.16b, v5.16b // GHASH block 1 + ldr q7, [x0, #48] // AES block 3 - load ciphertext + mov x7, v0.d[1] // AES block 0 - mov high + mov x6, v0.d[0] // AES block 0 - mov low + rev64 v4.16b, v4.16b // GHASH block 0 + add w12, w12, #1 // CTR block 4 + fmov d0, x10 // CTR block 4 + orr x9, x11, x9, lsl #32 // CTR block 4 + fmov v0.d[1], x9 // CTR block 4 + rev w9, w12 // CTR block 5 + add w12, w12, #1 // CTR block 5 + mov x19, v1.d[0] // AES block 1 - mov low + orr x9, x11, x9, lsl #32 // CTR block 5 + mov x20, v1.d[1] // AES block 1 - mov high + eor x7, x7, x14 // AES block 0 - round N high + eor x6, x6, x13 // AES block 0 - round N low + stp x6, x7, [x2], #16 // AES block 0 - store result + fmov d1, x10 // CTR block 5 + ldr q6, [x0, #32] // AES block 2 - load ciphertext + add x0, x0, #64 // AES input_ptr update + fmov v1.d[1], x9 // CTR block 5 + rev w9, w12 // CTR block 6 + add w12, w12, #1 // CTR block 6 + eor x19, x19, x13 // AES block 1 - round N low + orr x9, x11, x9, lsl #32 // CTR block 6 + eor x20, x20, x14 // AES block 1 - round N high + stp x19, x20, [x2], #16 // AES block 1 - store result + eor v2.16b, v6.16b, v2.16b // AES block 2 - result + cmp x0, x5 // check if we have <= 8 blocks + b.ge .Ldec_prepretail // do prepretail + +.Ldec_main_loop: // main loop start + mov x21, v2.d[0] // AES block 4k+2 - mov low + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + mov x22, v2.d[1] // AES block 4k+2 - mov high + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d2, x10 // CTR block 4k+6 + fmov v2.d[1], x9 // CTR block 4k+6 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev w9, w12 // CTR block 4k+7 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + mov x24, v3.d[1] // AES block 4k+3 - mov high + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + mov x23, v3.d[0] // AES block 4k+3 - mov low + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + mov d8, v4.d[1] // GHASH block 4k - mid + fmov d3, x10 // CTR block 4k+7 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + orr x9, x11, x9, lsl #32 // CTR block 4k+7 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + fmov v3.d[1], x9 // CTR block 4k+7 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + eor x22, x22, x14 // AES block 4k+2 - round N high + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + rev64 v6.16b, v6.16b // GHASH block 4k+2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + eor x21, x21, x13 // AES block 4k+2 - round N low + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + stp x21, x22, [x2], #16 // AES block 4k+2 - store result + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + rev64 v7.16b, v7.16b // GHASH block 4k+3 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + eor x23, x23, x13 // AES block 4k+3 - round N low + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + eor x24, x24, x14 // AES block 4k+3 - round N high + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + add w12, w12, #1 // CTR block 4k+7 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + rev w9, w12 // CTR block 4k+8 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + add w12, w12, #1 // CTR block 4k+8 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + mov d6, v7.d[1] // GHASH block 4k+3 - mid + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + orr x9, x11, x9, lsl #32 // CTR block 4k+8 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + cmp x17, #12 // setup flags for AES-128/192/256 check + eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid + movi v8.8b, #0xc2 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + shl d8, d8, #56 // mod_constant + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + b.lt .Ldec_main_loop_continue // branch if AES-128 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + b.eq .Ldec_main_loop_continue // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +.Ldec_main_loop_continue: + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + ldr q4, [x0, #0] // AES block 4k+4 - load ciphertext + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + ldr q5, [x0, #16] // AES block 4k+5 - load ciphertext + eor v0.16b, v4.16b, v0.16b // AES block 4k+4 - result + stp x23, x24, [x2], #16 // AES block 4k+3 - store result + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + ldr q7, [x0, #48] // AES block 4k+7 - load ciphertext + ldr q6, [x0, #32] // AES block 4k+6 - load ciphertext + mov x7, v0.d[1] // AES block 4k+4 - mov high + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + add x0, x0, #64 // AES input_ptr update + mov x6, v0.d[0] // AES block 4k+4 - mov low + fmov d0, x10 // CTR block 4k+8 + fmov v0.d[1], x9 // CTR block 4k+8 + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor v1.16b, v5.16b, v1.16b // AES block 4k+5 - result + rev w9, w12 // CTR block 4k+9 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + orr x9, x11, x9, lsl #32 // CTR block 4k+9 + cmp x0, x5 // .LOOP CONTROL + add w12, w12, #1 // CTR block 4k+9 + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + mov x20, v1.d[1] // AES block 4k+5 - mov high + eor v2.16b, v6.16b, v2.16b // AES block 4k+6 - result + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + mov x19, v1.d[0] // AES block 4k+5 - mov low + fmov d1, x10 // CTR block 4k+9 + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + fmov v1.d[1], x9 // CTR block 4k+9 + rev w9, w12 // CTR block 4k+10 + add w12, w12, #1 // CTR block 4k+10 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + orr x9, x11, x9, lsl #32 // CTR block 4k+10 + rev64 v5.16b, v5.16b // GHASH block 4k+5 + eor x20, x20, x14 // AES block 4k+5 - round N high + stp x6, x7, [x2], #16 // AES block 4k+4 - store result + eor x19, x19, x13 // AES block 4k+5 - round N low + stp x19, x20, [x2], #16 // AES block 4k+5 - store result + rev64 v4.16b, v4.16b // GHASH block 4k+4 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + b.lt .Ldec_main_loop + +.Ldec_prepretail: // PREPRETAIL + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + mov x21, v2.d[0] // AES block 4k+2 - mov low + eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + mov x22, v2.d[1] // AES block 4k+2 - mov high + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d2, x10 // CTR block 4k+6 + fmov v2.d[1], x9 // CTR block 4k+6 + rev w9, w12 // CTR block 4k+7 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev64 v6.16b, v6.16b // GHASH block 4k+2 + orr x9, x11, x9, lsl #32 // CTR block 4k+7 + mov x23, v3.d[0] // AES block 4k+3 - mov low + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + mov x24, v3.d[1] // AES block 4k+3 - mov high + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + mov d8, v4.d[1] // GHASH block 4k - mid + fmov d3, x10 // CTR block 4k+7 + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + fmov v3.d[1], x9 // CTR block 4k+7 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + rev64 v7.16b, v7.16b // GHASH block 4k+3 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + mov d6, v7.d[1] // GHASH block 4k+3 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low + pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + cmp x17, #12 // setup flags for AES-128/192/256 check + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + shl d8, d8, #56 // mod_constant + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + b.lt .Ldec_finish_prepretail // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + b.eq .Ldec_finish_prepretail // branch if AES-192 + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +.Ldec_finish_prepretail: + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor x22, x22, x14 // AES block 4k+2 - round N high + eor x23, x23, x13 // AES block 4k+3 - round N low + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + add w12, w12, #1 // CTR block 4k+7 + eor x21, x21, x13 // AES block 4k+2 - round N low + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor x24, x24, x14 // AES block 4k+3 - round N high + stp x21, x22, [x2], #16 // AES block 4k+2 - store result + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + stp x23, x24, [x2], #16 // AES block 4k+3 - store result + + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + +.Ldec_tail: // TAIL + sub x5, x4, x0 // main_end_input_ptr is number of bytes left to process + ld1 { v5.16b}, [x0], #16 // AES block 4k+4 - load ciphertext + eor v0.16b, v5.16b, v0.16b // AES block 4k+4 - result + mov x6, v0.d[0] // AES block 4k+4 - mov low + mov x7, v0.d[1] // AES block 4k+4 - mov high + ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag + cmp x5, #48 + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + b.gt .Ldec_blocks_more_than_3 + sub w12, w12, #1 + mov v3.16b, v2.16b + movi v10.8b, #0 + movi v11.8b, #0 + cmp x5, #32 + movi v9.8b, #0 + mov v2.16b, v1.16b + b.gt .Ldec_blocks_more_than_2 + sub w12, w12, #1 + mov v3.16b, v1.16b + cmp x5, #16 + b.gt .Ldec_blocks_more_than_1 + sub w12, w12, #1 + b .Ldec_blocks_less_than_1 +.Ldec_blocks_more_than_3: // blocks left > 3 + rev64 v4.16b, v5.16b // GHASH final-3 block + ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext + stp x6, x7, [x2], #16 // AES final-3 block - store result + mov d10, v17.d[1] // GHASH final-3 block - mid + eor v4.16b, v4.16b, v8.16b // feed in partial tag + eor v0.16b, v5.16b, v1.16b // AES final-2 block - result + mov d22, v4.d[1] // GHASH final-3 block - mid + mov x6, v0.d[0] // AES final-2 block - mov low + mov x7, v0.d[1] // AES final-2 block - mov high + eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid + movi v8.8b, #0 // suppress further partial tag feed in + pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high + pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid + eor x6, x6, x13 // AES final-2 block - round N low + pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low + eor x7, x7, x14 // AES final-2 block - round N high +.Ldec_blocks_more_than_2: // blocks left > 2 + rev64 v4.16b, v5.16b // GHASH final-2 block + ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext + eor v4.16b, v4.16b, v8.16b // feed in partial tag + stp x6, x7, [x2], #16 // AES final-2 block - store result + eor v0.16b, v5.16b, v2.16b // AES final-1 block - result + mov d22, v4.d[1] // GHASH final-2 block - mid + pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low + pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high + eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid + mov x6, v0.d[0] // AES final-1 block - mov low + mov x7, v0.d[1] // AES final-1 block - mov high + eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low + movi v8.8b, #0 // suppress further partial tag feed in + pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high + eor x6, x6, x13 // AES final-1 block - round N low + eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid + eor x7, x7, x14 // AES final-1 block - round N high +.Ldec_blocks_more_than_1: // blocks left > 1 + stp x6, x7, [x2], #16 // AES final-1 block - store result + rev64 v4.16b, v5.16b // GHASH final-1 block + ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext + eor v4.16b, v4.16b, v8.16b // feed in partial tag + movi v8.8b, #0 // suppress further partial tag feed in + mov d22, v4.d[1] // GHASH final-1 block - mid + eor v0.16b, v5.16b, v3.16b // AES final block - result + pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high + eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid + pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low + mov x6, v0.d[0] // AES final block - mov low + ins v22.d[1], v22.d[0] // GHASH final-1 block - mid + mov x7, v0.d[1] // AES final block - mov high + pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid + eor x6, x6, x13 // AES final block - round N low + eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low + eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high + eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid + eor x7, x7, x14 // AES final block - round N high +.Ldec_blocks_less_than_1: // blocks left <= 1 + and x1, x1, #127 // bit_length %= 128 + mvn x14, xzr // rkN_h = 0xffffffffffffffff + sub x1, x1, #128 // bit_length -= 128 + mvn x13, xzr // rkN_l = 0xffffffffffffffff + ldp x4, x5, [x2] // load existing bytes we need to not overwrite + neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128]) + and x1, x1, #127 // bit_length %= 128 + lsr x14, x14, x1 // rkN_h is mask for top 64b of last block + cmp x1, #64 + csel x9, x13, x14, lt + csel x10, x14, xzr, lt + fmov d0, x9 // ctr0b is mask for last block + and x6, x6, x9 + mov v0.d[1], x10 + bic x4, x4, x9 // mask out low existing bytes + rev w9, w12 + bic x5, x5, x10 // mask out high existing bytes + orr x6, x6, x4 + and x7, x7, x10 + orr x7, x7, x5 + and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits + rev64 v4.16b, v5.16b // GHASH final block + eor v4.16b, v4.16b, v8.16b // feed in partial tag + pmull v21.1q, v4.1d, v12.1d // GHASH final block - low + mov d8, v4.d[1] // GHASH final block - mid + eor v8.8b, v8.8b, v4.8b // GHASH final block - mid + pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high + pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final block - high + eor v11.16b, v11.16b, v21.16b // GHASH final block - low + eor v10.16b, v10.16b, v8.16b // GHASH final block - mid + movi v8.8b, #0xc2 + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + shl d8, d8, #56 // mod_constant + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + stp x6, x7, [x2] + str w9, [x16, #12] // store the updated counter + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp d8, d9, [sp, #64] + ldp d10, d11, [sp, #80] + ldp d12, d13, [sp, #96] + ldp d14, d15, [sp, #112] + ldp x29, x30, [sp], #128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size aes_gcm_dec_kernel,.-aes_gcm_dec_kernel +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-win.S new file mode 100644 index 0000000000..1233796955 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/aesv8-gcm-armv8-win.S @@ -0,0 +1,1559 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include +#if __ARM_MAX_ARCH__ >= 8 + +.arch armv8-a+crypto +.text +.globl aes_gcm_enc_kernel + +.def aes_gcm_enc_kernel + .type 32 +.endef +.align 4 +aes_gcm_enc_kernel: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp, #-128]! + mov x29, sp + stp x19, x20, [sp, #16] + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp d8, d9, [sp, #64] + stp d10, d11, [sp, #80] + stp d12, d13, [sp, #96] + stp d14, d15, [sp, #112] + ldr w17, [x8, #240] + add x19, x8, x17, lsl #4 // borrow input_l1 for last key + ldp x13, x14, [x19] // load round N keys + ldr q31, [x19, #-16] // load round N-1 keys + add x4, x0, x1, lsr #3 // end_input_ptr + lsr x5, x1, #3 // byte_len + mov x15, x5 + ldp x10, x11, [x16] // ctr96_b64, ctr96_t32 + ld1 { v0.16b}, [x16] // special case vector load initial counter so we can start first AES block as quickly as possible + sub x5, x5, #1 // byte_len - 1 + ldr q18, [x8, #0] // load rk0 + and x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + ldr q25, [x8, #112] // load rk7 + add x5, x5, x0 + lsr x12, x11, #32 + fmov d2, x10 // CTR block 2 + orr w11, w11, w11 + rev w12, w12 // rev_ctr32 + fmov d1, x10 // CTR block 1 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 0 - round 0 + add w12, w12, #1 // increment rev_ctr32 + rev w9, w12 // CTR block 1 + fmov d3, x10 // CTR block 3 + orr x9, x11, x9, lsl #32 // CTR block 1 + add w12, w12, #1 // CTR block 1 + ldr q19, [x8, #16] // load rk1 + fmov v1.d[1], x9 // CTR block 1 + rev w9, w12 // CTR block 2 + add w12, w12, #1 // CTR block 2 + orr x9, x11, x9, lsl #32 // CTR block 2 + ldr q20, [x8, #32] // load rk2 + fmov v2.d[1], x9 // CTR block 2 + rev w9, w12 // CTR block 3 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 0 - round 1 + orr x9, x11, x9, lsl #32 // CTR block 3 + fmov v3.d[1], x9 // CTR block 3 + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 1 - round 0 + ldr q21, [x8, #48] // load rk3 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 0 - round 2 + ldr q24, [x8, #96] // load rk6 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 2 - round 0 + ldr q23, [x8, #80] // load rk5 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 1 - round 1 + ldr q14, [x6, #48] // load h3l | h3h + ext v14.16b, v14.16b, v14.16b, #8 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 3 - round 0 + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 2 - round 1 + ldr q22, [x8, #64] // load rk4 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 1 - round 2 + ldr q13, [x6, #32] // load h2l | h2h + ext v13.16b, v13.16b, v13.16b, #8 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 3 - round 1 + ldr q30, [x8, #192] // load rk12 + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 2 - round 2 + ldr q15, [x6, #80] // load h4l | h4h + ext v15.16b, v15.16b, v15.16b, #8 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 1 - round 3 + ldr q29, [x8, #176] // load rk11 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 3 - round 2 + ldr q26, [x8, #128] // load rk8 + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 2 - round 3 + add w12, w12, #1 // CTR block 3 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 0 - round 3 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 3 - round 3 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 2 - round 4 + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 0 - round 4 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 1 - round 4 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 3 - round 4 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 0 - round 5 + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 1 - round 5 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 3 - round 5 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 2 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 1 - round 6 + trn2 v17.2d, v14.2d, v15.2d // h4l | h3l + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 3 - round 6 + ldr q27, [x8, #144] // load rk9 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 0 - round 6 + ldr q12, [x6] // load h1l | h1h + ext v12.16b, v12.16b, v12.16b, #8 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 2 - round 6 + ldr q28, [x8, #160] // load rk10 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 1 - round 7 + trn1 v9.2d, v14.2d, v15.2d // h4h | h3h + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 0 - round 7 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 2 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 3 - round 7 + trn2 v16.2d, v12.2d, v13.2d // h2l | h1l + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 1 - round 8 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 2 - round 8 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 3 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 0 - round 8 + b.lt Lenc_finish_first_blocks // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 1 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 2 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 3 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 0 - round 9 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 1 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 2 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 3 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 0 - round 10 + b.eq Lenc_finish_first_blocks // branch if AES-192 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 1 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 2 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 0 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 3 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 1 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 2 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 0 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 3 - round 12 + +Lenc_finish_first_blocks: + cmp x0, x5 // check if we have <= 4 blocks + eor v17.16b, v17.16b, v9.16b // h4k | h3k + aese v2.16b, v31.16b // AES block 2 - round N-1 + trn1 v8.2d, v12.2d, v13.2d // h2h | h1h + aese v1.16b, v31.16b // AES block 1 - round N-1 + aese v0.16b, v31.16b // AES block 0 - round N-1 + aese v3.16b, v31.16b // AES block 3 - round N-1 + eor v16.16b, v16.16b, v8.16b // h2k | h1k + b.ge Lenc_tail // handle tail + + ldp x19, x20, [x0, #16] // AES block 1 - load plaintext + rev w9, w12 // CTR block 4 + ldp x6, x7, [x0, #0] // AES block 0 - load plaintext + ldp x23, x24, [x0, #48] // AES block 3 - load plaintext + ldp x21, x22, [x0, #32] // AES block 2 - load plaintext + add x0, x0, #64 // AES input_ptr update + eor x19, x19, x13 // AES block 1 - round N low + eor x20, x20, x14 // AES block 1 - round N high + fmov d5, x19 // AES block 1 - mov low + eor x6, x6, x13 // AES block 0 - round N low + eor x7, x7, x14 // AES block 0 - round N high + eor x24, x24, x14 // AES block 3 - round N high + fmov d4, x6 // AES block 0 - mov low + cmp x0, x5 // check if we have <= 8 blocks + fmov v4.d[1], x7 // AES block 0 - mov high + eor x23, x23, x13 // AES block 3 - round N low + eor x21, x21, x13 // AES block 2 - round N low + fmov v5.d[1], x20 // AES block 1 - mov high + fmov d6, x21 // AES block 2 - mov low + add w12, w12, #1 // CTR block 4 + orr x9, x11, x9, lsl #32 // CTR block 4 + fmov d7, x23 // AES block 3 - mov low + eor x22, x22, x14 // AES block 2 - round N high + fmov v6.d[1], x22 // AES block 2 - mov high + eor v4.16b, v4.16b, v0.16b // AES block 0 - result + fmov d0, x10 // CTR block 4 + fmov v0.d[1], x9 // CTR block 4 + rev w9, w12 // CTR block 5 + add w12, w12, #1 // CTR block 5 + eor v5.16b, v5.16b, v1.16b // AES block 1 - result + fmov d1, x10 // CTR block 5 + orr x9, x11, x9, lsl #32 // CTR block 5 + fmov v1.d[1], x9 // CTR block 5 + rev w9, w12 // CTR block 6 + st1 { v4.16b}, [x2], #16 // AES block 0 - store result + fmov v7.d[1], x24 // AES block 3 - mov high + orr x9, x11, x9, lsl #32 // CTR block 6 + eor v6.16b, v6.16b, v2.16b // AES block 2 - result + st1 { v5.16b}, [x2], #16 // AES block 1 - store result + add w12, w12, #1 // CTR block 6 + fmov d2, x10 // CTR block 6 + fmov v2.d[1], x9 // CTR block 6 + st1 { v6.16b}, [x2], #16 // AES block 2 - store result + rev w9, w12 // CTR block 7 + orr x9, x11, x9, lsl #32 // CTR block 7 + eor v7.16b, v7.16b, v3.16b // AES block 3 - result + st1 { v7.16b}, [x2], #16 // AES block 3 - store result + b.ge Lenc_prepretail // do prepretail + +Lenc_main_loop: // main loop start + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d3, x10 // CTR block 4k+3 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + fmov v3.d[1], x9 // CTR block 4k+3 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + ldp x23, x24, [x0, #48] // AES block 4k+7 - load plaintext + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + ldp x21, x22, [x0, #32] // AES block 4k+6 - load plaintext + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + eor v4.16b, v4.16b, v11.16b // PRE 1 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + eor x23, x23, x13 // AES block 4k+7 - round N low + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + mov d10, v17.d[1] // GHASH block 4k - mid + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + eor x22, x22, x14 // AES block 4k+6 - round N high + mov d8, v4.d[1] // GHASH block 4k - mid + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 free) + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free) + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + ldp x19, x20, [x0, #16] // AES block 4k+5 - load plaintext + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + mov d4, v7.d[1] // GHASH block 4k+3 - mid + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor x19, x19, x13 // AES block 4k+5 - round N low + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + eor x21, x21, x13 // AES block 4k+6 - round N low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + movi v8.8b, #0xc2 + pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + cmp x17, #12 // setup flags for AES-128/192/256 check + fmov d5, x19 // AES block 4k+5 - mov low + ldp x6, x7, [x0, #0] // AES block 4k+4 - load plaintext + b.lt Lenc_main_loop_continue // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + b.eq Lenc_main_loop_continue // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +Lenc_main_loop_continue: + shl d8, d8, #56 // mod_constant + eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid + add w12, w12, #1 // CTR block 4k+3 + eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + add x0, x0, #64 // AES input_ptr update + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + rev w9, w12 // CTR block 4k+8 + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor x6, x6, x13 // AES block 4k+4 - round N low + eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up + eor x7, x7, x14 // AES block 4k+4 - round N high + fmov d4, x6 // AES block 4k+4 - mov low + orr x9, x11, x9, lsl #32 // CTR block 4k+8 + eor v7.16b, v9.16b, v7.16b // MODULO - fold into mid + eor x20, x20, x14 // AES block 4k+5 - round N high + eor x24, x24, x14 // AES block 4k+7 - round N high + add w12, w12, #1 // CTR block 4k+8 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + fmov v4.d[1], x7 // AES block 4k+4 - mov high + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + fmov d7, x23 // AES block 4k+7 - mov low + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + fmov v5.d[1], x20 // AES block 4k+5 - mov high + fmov d6, x21 // AES block 4k+6 - mov low + cmp x0, x5 // LOOP CONTROL + fmov v6.d[1], x22 // AES block 4k+6 - mov high + pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor v4.16b, v4.16b, v0.16b // AES block 4k+4 - result + fmov d0, x10 // CTR block 4k+8 + fmov v0.d[1], x9 // CTR block 4k+8 + rev w9, w12 // CTR block 4k+9 + add w12, w12, #1 // CTR block 4k+9 + eor v5.16b, v5.16b, v1.16b // AES block 4k+5 - result + fmov d1, x10 // CTR block 4k+9 + orr x9, x11, x9, lsl #32 // CTR block 4k+9 + fmov v1.d[1], x9 // CTR block 4k+9 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + rev w9, w12 // CTR block 4k+10 + st1 { v4.16b}, [x2], #16 // AES block 4k+4 - store result + orr x9, x11, x9, lsl #32 // CTR block 4k+10 + eor v11.16b, v11.16b, v9.16b // MODULO - fold into low + fmov v7.d[1], x24 // AES block 4k+7 - mov high + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + st1 { v5.16b}, [x2], #16 // AES block 4k+5 - store result + add w12, w12, #1 // CTR block 4k+10 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + eor v6.16b, v6.16b, v2.16b // AES block 4k+6 - result + fmov d2, x10 // CTR block 4k+10 + st1 { v6.16b}, [x2], #16 // AES block 4k+6 - store result + fmov v2.d[1], x9 // CTR block 4k+10 + rev w9, w12 // CTR block 4k+11 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + orr x9, x11, x9, lsl #32 // CTR block 4k+11 + eor v7.16b, v7.16b, v3.16b // AES block 4k+7 - result + st1 { v7.16b}, [x2], #16 // AES block 4k+7 - store result + b.lt Lenc_main_loop + +Lenc_prepretail: // PREPRETAIL + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free) + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + fmov d3, x10 // CTR block 4k+3 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) + fmov v3.d[1], x9 // CTR block 4k+3 + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + mov d8, v4.d[1] // GHASH block 4k - mid + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 free) + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + add w12, w12, #1 // CTR block 4k+3 + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + mov d4, v7.d[1] // GHASH block 4k+3 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + shl d8, d8, #56 // mod_constant + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid + pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v10.16b, v10.16b, v9.16b // karatsuba tidy up + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + pmull v4.1q, v9.1d, v8.1d + ext v9.16b, v9.16b, v9.16b, #8 + eor v10.16b, v10.16b, v11.16b + b.lt Lenc_finish_prepretail // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + b.eq Lenc_finish_prepretail // branch if AES-192 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + +Lenc_finish_prepretail: + eor v10.16b, v10.16b, v4.16b + eor v10.16b, v10.16b, v9.16b + pmull v4.1q, v10.1d, v8.1d + ext v10.16b, v10.16b, v10.16b, #8 + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + eor v11.16b, v11.16b, v4.16b + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + eor v11.16b, v11.16b, v10.16b + +Lenc_tail: // TAIL + ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag + sub x5, x4, x0 // main_end_input_ptr is number of bytes left to process + ldp x6, x7, [x0], #16 // AES block 4k+4 - load plaintext + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + cmp x5, #48 + fmov d4, x6 // AES block 4k+4 - mov low + fmov v4.d[1], x7 // AES block 4k+4 - mov high + eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result + b.gt Lenc_blocks_more_than_3 + cmp x5, #32 + mov v3.16b, v2.16b + movi v11.8b, #0 + movi v9.8b, #0 + sub w12, w12, #1 + mov v2.16b, v1.16b + movi v10.8b, #0 + b.gt Lenc_blocks_more_than_2 + mov v3.16b, v1.16b + sub w12, w12, #1 + cmp x5, #16 + b.gt Lenc_blocks_more_than_1 + sub w12, w12, #1 + b Lenc_blocks_less_than_1 +Lenc_blocks_more_than_3: // blocks left > 3 + st1 { v5.16b}, [x2], #16 // AES final-3 block - store result + ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high + rev64 v4.16b, v5.16b // GHASH final-3 block + eor x6, x6, x13 // AES final-2 block - round N low + eor v4.16b, v4.16b, v8.16b // feed in partial tag + eor x7, x7, x14 // AES final-2 block - round N high + mov d22, v4.d[1] // GHASH final-3 block - mid + fmov d5, x6 // AES final-2 block - mov low + fmov v5.d[1], x7 // AES final-2 block - mov high + eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid + movi v8.8b, #0 // suppress further partial tag feed in + mov d10, v17.d[1] // GHASH final-3 block - mid + pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low + pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high + pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid + eor v5.16b, v5.16b, v1.16b // AES final-2 block - result +Lenc_blocks_more_than_2: // blocks left > 2 + st1 { v5.16b}, [x2], #16 // AES final-2 block - store result + ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high + rev64 v4.16b, v5.16b // GHASH final-2 block + eor x6, x6, x13 // AES final-1 block - round N low + eor v4.16b, v4.16b, v8.16b // feed in partial tag + fmov d5, x6 // AES final-1 block - mov low + eor x7, x7, x14 // AES final-1 block - round N high + fmov v5.d[1], x7 // AES final-1 block - mov high + movi v8.8b, #0 // suppress further partial tag feed in + pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high + mov d22, v4.d[1] // GHASH final-2 block - mid + pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low + eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid + eor v5.16b, v5.16b, v2.16b // AES final-1 block - result + eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high + pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low + eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid +Lenc_blocks_more_than_1: // blocks left > 1 + st1 { v5.16b}, [x2], #16 // AES final-1 block - store result + rev64 v4.16b, v5.16b // GHASH final-1 block + ldp x6, x7, [x0], #16 // AES final block - load input low & high + eor v4.16b, v4.16b, v8.16b // feed in partial tag + movi v8.8b, #0 // suppress further partial tag feed in + eor x6, x6, x13 // AES final block - round N low + mov d22, v4.d[1] // GHASH final-1 block - mid + pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high + eor x7, x7, x14 // AES final block - round N high + eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high + ins v22.d[1], v22.d[0] // GHASH final-1 block - mid + fmov d5, x6 // AES final block - mov low + fmov v5.d[1], x7 // AES final block - mov high + pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid + pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low + eor v5.16b, v5.16b, v3.16b // AES final block - result + eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low +Lenc_blocks_less_than_1: // blocks left <= 1 + and x1, x1, #127 // bit_length %= 128 + mvn x13, xzr // rkN_l = 0xffffffffffffffff + sub x1, x1, #128 // bit_length -= 128 + neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128]) + ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial last block is to be stored + mvn x14, xzr // rkN_h = 0xffffffffffffffff + and x1, x1, #127 // bit_length %= 128 + lsr x14, x14, x1 // rkN_h is mask for top 64b of last block + cmp x1, #64 + csel x6, x13, x14, lt + csel x7, x14, xzr, lt + fmov d0, x6 // ctr0b is mask for last block + fmov v0.d[1], x7 + and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits + rev64 v4.16b, v5.16b // GHASH final block + eor v4.16b, v4.16b, v8.16b // feed in partial tag + bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of result before storing + pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high + mov d8, v4.d[1] // GHASH final block - mid + rev w9, w12 + pmull v21.1q, v4.1d, v12.1d // GHASH final block - low + eor v9.16b, v9.16b, v20.16b // GHASH final block - high + eor v8.8b, v8.8b, v4.8b // GHASH final block - mid + pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid + eor v11.16b, v11.16b, v21.16b // GHASH final block - low + eor v10.16b, v10.16b, v8.16b // GHASH final block - mid + movi v8.8b, #0xc2 + eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + shl d8, d8, #56 // mod_constant + eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + str w9, [x16, #12] // store the updated counter + st1 { v5.16b}, [x2] // store all 16B + eor v11.16b, v11.16b, v9.16b // MODULO - fold into low + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp d8, d9, [sp, #64] + ldp d10, d11, [sp, #80] + ldp d12, d13, [sp, #96] + ldp d14, d15, [sp, #112] + ldp x29, x30, [sp], #128 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl aes_gcm_dec_kernel + +.def aes_gcm_dec_kernel + .type 32 +.endef +.align 4 +aes_gcm_dec_kernel: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp, #-128]! + mov x29, sp + stp x19, x20, [sp, #16] + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp d8, d9, [sp, #64] + stp d10, d11, [sp, #80] + stp d12, d13, [sp, #96] + stp d14, d15, [sp, #112] + ldr w17, [x8, #240] + add x19, x8, x17, lsl #4 // borrow input_l1 for last key + ldp x13, x14, [x19] // load round N keys + ldr q31, [x19, #-16] // load round N-1 keys + lsr x5, x1, #3 // byte_len + mov x15, x5 + ldp x10, x11, [x16] // ctr96_b64, ctr96_t32 + ldr q26, [x8, #128] // load rk8 + sub x5, x5, #1 // byte_len - 1 + ldr q25, [x8, #112] // load rk7 + and x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + add x4, x0, x1, lsr #3 // end_input_ptr + ldr q24, [x8, #96] // load rk6 + lsr x12, x11, #32 + ldr q23, [x8, #80] // load rk5 + orr w11, w11, w11 + ldr q21, [x8, #48] // load rk3 + add x5, x5, x0 + rev w12, w12 // rev_ctr32 + add w12, w12, #1 // increment rev_ctr32 + fmov d3, x10 // CTR block 3 + rev w9, w12 // CTR block 1 + add w12, w12, #1 // CTR block 1 + fmov d1, x10 // CTR block 1 + orr x9, x11, x9, lsl #32 // CTR block 1 + ld1 { v0.16b}, [x16] // special case vector load initial counter so we can start first AES block as quickly as possible + fmov v1.d[1], x9 // CTR block 1 + rev w9, w12 // CTR block 2 + add w12, w12, #1 // CTR block 2 + fmov d2, x10 // CTR block 2 + orr x9, x11, x9, lsl #32 // CTR block 2 + fmov v2.d[1], x9 // CTR block 2 + rev w9, w12 // CTR block 3 + orr x9, x11, x9, lsl #32 // CTR block 3 + ldr q18, [x8, #0] // load rk0 + fmov v3.d[1], x9 // CTR block 3 + add w12, w12, #1 // CTR block 3 + ldr q22, [x8, #64] // load rk4 + ldr q19, [x8, #16] // load rk1 + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 0 - round 0 + ldr q14, [x6, #48] // load h3l | h3h + ext v14.16b, v14.16b, v14.16b, #8 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 3 - round 0 + ldr q15, [x6, #80] // load h4l | h4h + ext v15.16b, v15.16b, v15.16b, #8 + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 1 - round 0 + ldr q13, [x6, #32] // load h2l | h2h + ext v13.16b, v13.16b, v13.16b, #8 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 2 - round 0 + ldr q20, [x8, #32] // load rk2 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 0 - round 1 + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 1 - round 1 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 2 - round 1 + ldr q27, [x8, #144] // load rk9 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 3 - round 1 + ldr q30, [x8, #192] // load rk12 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 0 - round 2 + ldr q12, [x6] // load h1l | h1h + ext v12.16b, v12.16b, v12.16b, #8 + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 2 - round 2 + ldr q28, [x8, #160] // load rk10 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 3 - round 2 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 0 - round 3 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 1 - round 2 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 3 - round 3 + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 0 - round 4 + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 2 - round 3 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 1 - round 3 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 3 - round 4 + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 2 - round 4 + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 1 - round 4 + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 3 - round 5 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 0 - round 5 + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 1 - round 5 + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 2 - round 5 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 0 - round 6 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 3 - round 6 + cmp x17, #12 // setup flags for AES-128/192/256 check + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 1 - round 6 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 2 - round 6 + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 0 - round 7 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 1 - round 7 + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 3 - round 7 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 0 - round 8 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 2 - round 7 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 3 - round 8 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 1 - round 8 + ldr q29, [x8, #176] // load rk11 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 2 - round 8 + b.lt Ldec_finish_first_blocks // branch if AES-128 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 0 - round 9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 1 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 3 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 2 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 0 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 1 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 3 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 2 - round 10 + b.eq Ldec_finish_first_blocks // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 0 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 3 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 1 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 2 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 1 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 0 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 2 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 3 - round 12 + +Ldec_finish_first_blocks: + cmp x0, x5 // check if we have <= 4 blocks + trn1 v9.2d, v14.2d, v15.2d // h4h | h3h + trn2 v17.2d, v14.2d, v15.2d // h4l | h3l + trn1 v8.2d, v12.2d, v13.2d // h2h | h1h + trn2 v16.2d, v12.2d, v13.2d // h2l | h1l + eor v17.16b, v17.16b, v9.16b // h4k | h3k + aese v1.16b, v31.16b // AES block 1 - round N-1 + aese v2.16b, v31.16b // AES block 2 - round N-1 + eor v16.16b, v16.16b, v8.16b // h2k | h1k + aese v3.16b, v31.16b // AES block 3 - round N-1 + aese v0.16b, v31.16b // AES block 0 - round N-1 + b.ge Ldec_tail // handle tail + + ldr q4, [x0, #0] // AES block 0 - load ciphertext + ldr q5, [x0, #16] // AES block 1 - load ciphertext + rev w9, w12 // CTR block 4 + eor v0.16b, v4.16b, v0.16b // AES block 0 - result + eor v1.16b, v5.16b, v1.16b // AES block 1 - result + rev64 v5.16b, v5.16b // GHASH block 1 + ldr q7, [x0, #48] // AES block 3 - load ciphertext + mov x7, v0.d[1] // AES block 0 - mov high + mov x6, v0.d[0] // AES block 0 - mov low + rev64 v4.16b, v4.16b // GHASH block 0 + add w12, w12, #1 // CTR block 4 + fmov d0, x10 // CTR block 4 + orr x9, x11, x9, lsl #32 // CTR block 4 + fmov v0.d[1], x9 // CTR block 4 + rev w9, w12 // CTR block 5 + add w12, w12, #1 // CTR block 5 + mov x19, v1.d[0] // AES block 1 - mov low + orr x9, x11, x9, lsl #32 // CTR block 5 + mov x20, v1.d[1] // AES block 1 - mov high + eor x7, x7, x14 // AES block 0 - round N high + eor x6, x6, x13 // AES block 0 - round N low + stp x6, x7, [x2], #16 // AES block 0 - store result + fmov d1, x10 // CTR block 5 + ldr q6, [x0, #32] // AES block 2 - load ciphertext + add x0, x0, #64 // AES input_ptr update + fmov v1.d[1], x9 // CTR block 5 + rev w9, w12 // CTR block 6 + add w12, w12, #1 // CTR block 6 + eor x19, x19, x13 // AES block 1 - round N low + orr x9, x11, x9, lsl #32 // CTR block 6 + eor x20, x20, x14 // AES block 1 - round N high + stp x19, x20, [x2], #16 // AES block 1 - store result + eor v2.16b, v6.16b, v2.16b // AES block 2 - result + cmp x0, x5 // check if we have <= 8 blocks + b.ge Ldec_prepretail // do prepretail + +Ldec_main_loop: // main loop start + mov x21, v2.d[0] // AES block 4k+2 - mov low + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + mov x22, v2.d[1] // AES block 4k+2 - mov high + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d2, x10 // CTR block 4k+6 + fmov v2.d[1], x9 // CTR block 4k+6 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev w9, w12 // CTR block 4k+7 + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + mov x24, v3.d[1] // AES block 4k+3 - mov high + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + mov x23, v3.d[0] // AES block 4k+3 - mov low + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + mov d8, v4.d[1] // GHASH block 4k - mid + fmov d3, x10 // CTR block 4k+7 + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + orr x9, x11, x9, lsl #32 // CTR block 4k+7 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + fmov v3.d[1], x9 // CTR block 4k+7 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + eor x22, x22, x14 // AES block 4k+2 - round N high + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + rev64 v6.16b, v6.16b // GHASH block 4k+2 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + eor x21, x21, x13 // AES block 4k+2 - round N low + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + stp x21, x22, [x2], #16 // AES block 4k+2 - store result + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + rev64 v7.16b, v7.16b // GHASH block 4k+3 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + eor x23, x23, x13 // AES block 4k+3 - round N low + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + eor x24, x24, x14 // AES block 4k+3 - round N high + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + add w12, w12, #1 // CTR block 4k+7 + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + rev w9, w12 // CTR block 4k+8 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + add w12, w12, #1 // CTR block 4k+8 + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + mov d6, v7.d[1] // GHASH block 4k+3 - mid + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + orr x9, x11, x9, lsl #32 // CTR block 4k+8 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + cmp x17, #12 // setup flags for AES-128/192/256 check + eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid + movi v8.8b, #0xc2 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + shl d8, d8, #56 // mod_constant + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + b.lt Ldec_main_loop_continue // branch if AES-128 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + b.eq Ldec_main_loop_continue // branch if AES-192 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +Ldec_main_loop_continue: + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + ldr q4, [x0, #0] // AES block 4k+4 - load ciphertext + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + ldr q5, [x0, #16] // AES block 4k+5 - load ciphertext + eor v0.16b, v4.16b, v0.16b // AES block 4k+4 - result + stp x23, x24, [x2], #16 // AES block 4k+3 - store result + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + ldr q7, [x0, #48] // AES block 4k+7 - load ciphertext + ldr q6, [x0, #32] // AES block 4k+6 - load ciphertext + mov x7, v0.d[1] // AES block 4k+4 - mov high + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + add x0, x0, #64 // AES input_ptr update + mov x6, v0.d[0] // AES block 4k+4 - mov low + fmov d0, x10 // CTR block 4k+8 + fmov v0.d[1], x9 // CTR block 4k+8 + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor v1.16b, v5.16b, v1.16b // AES block 4k+5 - result + rev w9, w12 // CTR block 4k+9 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + orr x9, x11, x9, lsl #32 // CTR block 4k+9 + cmp x0, x5 // LOOP CONTROL + add w12, w12, #1 // CTR block 4k+9 + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + mov x20, v1.d[1] // AES block 4k+5 - mov high + eor v2.16b, v6.16b, v2.16b // AES block 4k+6 - result + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + mov x19, v1.d[0] // AES block 4k+5 - mov low + fmov d1, x10 // CTR block 4k+9 + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + fmov v1.d[1], x9 // CTR block 4k+9 + rev w9, w12 // CTR block 4k+10 + add w12, w12, #1 // CTR block 4k+10 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + orr x9, x11, x9, lsl #32 // CTR block 4k+10 + rev64 v5.16b, v5.16b // GHASH block 4k+5 + eor x20, x20, x14 // AES block 4k+5 - round N high + stp x6, x7, [x2], #16 // AES block 4k+4 - store result + eor x19, x19, x13 // AES block 4k+5 - round N low + stp x19, x20, [x2], #16 // AES block 4k+5 - store result + rev64 v4.16b, v4.16b // GHASH block 4k+4 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + b.lt Ldec_main_loop + +Ldec_prepretail: // PREPRETAIL + ext v11.16b, v11.16b, v11.16b, #8 // PRE 0 + mov x21, v2.d[0] // AES block 4k+2 - mov low + eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 0 + mov x22, v2.d[1] // AES block 4k+2 - mov high + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 0 + fmov d2, x10 // CTR block 4k+6 + fmov v2.d[1], x9 // CTR block 4k+6 + rev w9, w12 // CTR block 4k+7 + eor v4.16b, v4.16b, v11.16b // PRE 1 + rev64 v6.16b, v6.16b // GHASH block 4k+2 + orr x9, x11, x9, lsl #32 // CTR block 4k+7 + mov x23, v3.d[0] // AES block 4k+3 - mov low + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 1 + mov x24, v3.d[1] // AES block 4k+3 - mov high + pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low + mov d8, v4.d[1] // GHASH block 4k - mid + fmov d3, x10 // CTR block 4k+7 + pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high + fmov v3.d[1], x9 // CTR block 4k+7 + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 0 + mov d10, v17.d[1] // GHASH block 4k - mid + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 1 + eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid + pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 1 + rev64 v7.16b, v7.16b // GHASH block 4k+3 + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 0 + pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high + pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 1 + mov d4, v5.d[1] // GHASH block 4k+1 - mid + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 2 + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 2 + eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 2 + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 3 + mov d8, v6.d[1] // GHASH block 4k+2 - mid + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 2 + eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid + pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 4 + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid + pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 4 + pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high + eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid + pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 3 + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 3 + eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high + pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 4 + mov d6, v7.d[1] // GHASH block 4k+3 - mid + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 4 + pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 5 + eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 5 + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 6 + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 6 + eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low + pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 7 + cmp x17, #12 // setup flags for AES-128/192/256 check + eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 7 + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 7 + eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 8 + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 7 + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 8 + shl d8, d8, #56 // mod_constant + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 8 + b.lt Ldec_finish_prepretail // branch if AES-128 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 9 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 9 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 9 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 10 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 10 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 10 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 10 + b.eq Ldec_finish_prepretail // branch if AES-192 + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 11 + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 11 + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 11 + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b // AES block 4k+6 - round 12 + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 11 + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b // AES block 4k+5 - round 12 + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b // AES block 4k+4 - round 12 + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b // AES block 4k+7 - round 12 + +Ldec_finish_prepretail: + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor x22, x22, x14 // AES block 4k+2 - round N high + eor x23, x23, x13 // AES block 4k+3 - round N low + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + add w12, w12, #1 // CTR block 4k+7 + eor x21, x21, x13 // AES block 4k+2 - round N low + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + eor x24, x24, x14 // AES block 4k+3 - round N high + stp x21, x22, [x2], #16 // AES block 4k+2 - store result + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + stp x23, x24, [x2], #16 // AES block 4k+3 - store result + + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + aese v1.16b, v31.16b // AES block 4k+5 - round N-1 + aese v0.16b, v31.16b // AES block 4k+4 - round N-1 + aese v3.16b, v31.16b // AES block 4k+7 - round N-1 + aese v2.16b, v31.16b // AES block 4k+6 - round N-1 + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + +Ldec_tail: // TAIL + sub x5, x4, x0 // main_end_input_ptr is number of bytes left to process + ld1 { v5.16b}, [x0], #16 // AES block 4k+4 - load ciphertext + eor v0.16b, v5.16b, v0.16b // AES block 4k+4 - result + mov x6, v0.d[0] // AES block 4k+4 - mov low + mov x7, v0.d[1] // AES block 4k+4 - mov high + ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag + cmp x5, #48 + eor x6, x6, x13 // AES block 4k+4 - round N low + eor x7, x7, x14 // AES block 4k+4 - round N high + b.gt Ldec_blocks_more_than_3 + sub w12, w12, #1 + mov v3.16b, v2.16b + movi v10.8b, #0 + movi v11.8b, #0 + cmp x5, #32 + movi v9.8b, #0 + mov v2.16b, v1.16b + b.gt Ldec_blocks_more_than_2 + sub w12, w12, #1 + mov v3.16b, v1.16b + cmp x5, #16 + b.gt Ldec_blocks_more_than_1 + sub w12, w12, #1 + b Ldec_blocks_less_than_1 +Ldec_blocks_more_than_3: // blocks left > 3 + rev64 v4.16b, v5.16b // GHASH final-3 block + ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext + stp x6, x7, [x2], #16 // AES final-3 block - store result + mov d10, v17.d[1] // GHASH final-3 block - mid + eor v4.16b, v4.16b, v8.16b // feed in partial tag + eor v0.16b, v5.16b, v1.16b // AES final-2 block - result + mov d22, v4.d[1] // GHASH final-3 block - mid + mov x6, v0.d[0] // AES final-2 block - mov low + mov x7, v0.d[1] // AES final-2 block - mov high + eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid + movi v8.8b, #0 // suppress further partial tag feed in + pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high + pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid + eor x6, x6, x13 // AES final-2 block - round N low + pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low + eor x7, x7, x14 // AES final-2 block - round N high +Ldec_blocks_more_than_2: // blocks left > 2 + rev64 v4.16b, v5.16b // GHASH final-2 block + ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext + eor v4.16b, v4.16b, v8.16b // feed in partial tag + stp x6, x7, [x2], #16 // AES final-2 block - store result + eor v0.16b, v5.16b, v2.16b // AES final-1 block - result + mov d22, v4.d[1] // GHASH final-2 block - mid + pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low + pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high + eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid + mov x6, v0.d[0] // AES final-1 block - mov low + mov x7, v0.d[1] // AES final-1 block - mov high + eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low + movi v8.8b, #0 // suppress further partial tag feed in + pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high + eor x6, x6, x13 // AES final-1 block - round N low + eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid + eor x7, x7, x14 // AES final-1 block - round N high +Ldec_blocks_more_than_1: // blocks left > 1 + stp x6, x7, [x2], #16 // AES final-1 block - store result + rev64 v4.16b, v5.16b // GHASH final-1 block + ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext + eor v4.16b, v4.16b, v8.16b // feed in partial tag + movi v8.8b, #0 // suppress further partial tag feed in + mov d22, v4.d[1] // GHASH final-1 block - mid + eor v0.16b, v5.16b, v3.16b // AES final block - result + pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high + eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid + pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low + mov x6, v0.d[0] // AES final block - mov low + ins v22.d[1], v22.d[0] // GHASH final-1 block - mid + mov x7, v0.d[1] // AES final block - mov high + pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid + eor x6, x6, x13 // AES final block - round N low + eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low + eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high + eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid + eor x7, x7, x14 // AES final block - round N high +Ldec_blocks_less_than_1: // blocks left <= 1 + and x1, x1, #127 // bit_length %= 128 + mvn x14, xzr // rkN_h = 0xffffffffffffffff + sub x1, x1, #128 // bit_length -= 128 + mvn x13, xzr // rkN_l = 0xffffffffffffffff + ldp x4, x5, [x2] // load existing bytes we need to not overwrite + neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128]) + and x1, x1, #127 // bit_length %= 128 + lsr x14, x14, x1 // rkN_h is mask for top 64b of last block + cmp x1, #64 + csel x9, x13, x14, lt + csel x10, x14, xzr, lt + fmov d0, x9 // ctr0b is mask for last block + and x6, x6, x9 + mov v0.d[1], x10 + bic x4, x4, x9 // mask out low existing bytes + rev w9, w12 + bic x5, x5, x10 // mask out high existing bytes + orr x6, x6, x4 + and x7, x7, x10 + orr x7, x7, x5 + and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in highest bits + rev64 v4.16b, v5.16b // GHASH final block + eor v4.16b, v4.16b, v8.16b // feed in partial tag + pmull v21.1q, v4.1d, v12.1d // GHASH final block - low + mov d8, v4.d[1] // GHASH final block - mid + eor v8.8b, v8.8b, v4.8b // GHASH final block - mid + pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high + pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid + eor v9.16b, v9.16b, v20.16b // GHASH final block - high + eor v11.16b, v11.16b, v21.16b // GHASH final block - low + eor v10.16b, v10.16b, v8.16b // GHASH final block - mid + movi v8.8b, #0xc2 + eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up + shl d8, d8, #56 // mod_constant + eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up + pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment + eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid + eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid + pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment + eor v11.16b, v11.16b, v8.16b // MODULO - fold into low + stp x6, x7, [x2] + str w9, [x16, #12] // store the updated counter + eor v11.16b, v11.16b, v10.16b // MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp d8, d9, [sp, #64] + ldp d10, d11, [sp, #80] + ldp d12, d13, [sp, #96] + ldp d14, d15, [sp, #112] + ldp x29, x30, [sp], #128 + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/armv4-mont-linux.S b/yass/third_party/boringssl/src/gen/bcm/armv4-mont-linux.S new file mode 100644 index 0000000000..0b845b6e9b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/armv4-mont-linux.S @@ -0,0 +1,939 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +#include + +@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both +@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. +.arch armv7-a + +.text +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.globl bn_mul_mont_nohw +.hidden bn_mul_mont_nohw +.type bn_mul_mont_nohw,%function + +.align 5 +bn_mul_mont_nohw: + ldr ip,[sp,#4] @ load num + stmdb sp!,{r0,r2} @ sp points at argument block + cmp ip,#2 + mov r0,ip @ load num +#ifdef __thumb2__ + ittt lt +#endif + movlt r0,#0 + addlt sp,sp,#2*4 + blt .Labrt + + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers + + mov r0,r0,lsl#2 @ rescale r0 for byte count + sub sp,sp,r0 @ alloca(4*num) + sub sp,sp,#4 @ +extra dword + sub r0,r0,#4 @ "num=num-1" + add r4,r2,r0 @ &bp[num-1] + + add r0,sp,r0 @ r0 to point at &tp[num-1] + ldr r8,[r0,#14*4] @ &n0 + ldr r2,[r2] @ bp[0] + ldr r5,[r1],#4 @ ap[0],ap++ + ldr r6,[r3],#4 @ np[0],np++ + ldr r8,[r8] @ *n0 + str r4,[r0,#15*4] @ save &bp[num] + + umull r10,r11,r5,r2 @ ap[0]*bp[0] + str r8,[r0,#14*4] @ save n0 value + mul r8,r10,r8 @ "tp[0]"*n0 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" + mov r4,sp + +.L1st: + ldr r5,[r1],#4 @ ap[j],ap++ + mov r10,r11 + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[0] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .L1st + + adds r12,r12,r11 + ldr r4,[r0,#13*4] @ restore bp + mov r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + mov r7,sp + str r14,[r0,#4] @ tp[num]= + +.Louter: + sub r7,r0,r7 @ "original" r0-1 value + sub r1,r1,r7 @ "rewind" ap to &ap[1] + ldr r2,[r4,#4]! @ *(++bp) + sub r3,r3,r7 @ "rewind" np to &np[1] + ldr r5,[r1,#-4] @ ap[0] + ldr r10,[sp] @ tp[0] + ldr r6,[r3,#-4] @ np[0] + ldr r7,[sp,#4] @ tp[1] + + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] + str r4,[r0,#13*4] @ save bp + mul r8,r10,r8 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" + mov r4,sp + +.Linner: + ldr r5,[r1],#4 @ ap[j],ap++ + adds r10,r11,r7 @ +=tp[j] + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[i] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adc r11,r11,#0 + ldr r7,[r4,#8] @ tp[j+1] + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .Linner + + adds r12,r12,r11 + mov r14,#0 + ldr r4,[r0,#13*4] @ restore bp + adc r14,r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adds r12,r12,r7 + ldr r7,[r0,#15*4] @ restore &bp[num] + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + + cmp r4,r7 +#ifdef __thumb2__ + itt ne +#endif + movne r7,sp + bne .Louter + + ldr r2,[r0,#12*4] @ pull rp + mov r5,sp + add r0,r0,#4 @ r0 to point at &tp[num] + sub r5,r0,r5 @ "original" num value + mov r4,sp @ "rewind" r4 + mov r1,r4 @ "borrow" r1 + sub r3,r3,r5 @ "rewind" r3 to &np[0] + + subs r7,r7,r7 @ "clear" carry flag +.Lsub: ldr r7,[r4],#4 + ldr r6,[r3],#4 + sbcs r7,r7,r6 @ tp[j]-np[j] + str r7,[r2],#4 @ rp[j]= + teq r4,r0 @ preserve carry + bne .Lsub + sbcs r14,r14,#0 @ upmost carry + mov r4,sp @ "rewind" r4 + sub r2,r2,r5 @ "rewind" r2 + +.Lcopy: ldr r7,[r4] @ conditional copy + ldr r5,[r2] + str sp,[r4],#4 @ zap tp +#ifdef __thumb2__ + it cc +#endif + movcc r5,r7 + str r5,[r2],#4 + teq r4,r0 @ preserve carry + bne .Lcopy + + mov sp,r0 + add sp,sp,#4 @ skip over tp[num+1] + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers + add sp,sp,#2*4 @ skip over {r0,r2} + mov r0,#1 +.Labrt: +#if __ARM_ARCH>=5 + bx lr @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size bn_mul_mont_nohw,.-bn_mul_mont_nohw +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl bn_mul8x_mont_neon +.hidden bn_mul8x_mont_neon +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldmia ip,{r4,r5} @ load rest of parameter block + mov ip,sp + + cmp r5,#8 + bhi .LNEON_8n + + @ special case for r5==8, everything is in register bank... + + vld1.32 {d28[0]}, [r2,:32]! + veor d8,d8,d8 + sub r7,sp,r5,lsl#4 + vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-( + and r7,r7,#-64 + vld1.32 {d30[0]}, [r4,:32] + mov sp,r7 @ alloca + vzip.16 d28,d8 + + vmull.u32 q6,d28,d0[0] + vmull.u32 q7,d28,d0[1] + vmull.u32 q8,d28,d1[0] + vshl.i64 d29,d13,#16 + vmull.u32 q9,d28,d1[1] + + vadd.u64 d29,d29,d12 + veor d8,d8,d8 + vmul.u32 d29,d29,d30 + + vmull.u32 q10,d28,d2[0] + vld1.32 {d4,d5,d6,d7}, [r3]! + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmull.u32 q13,d28,d3[1] + + vmlal.u32 q6,d29,d4[0] + sub r9,r5,#1 + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {d28[0]}, [r2,:32]! + veor d8,d8,d8 + vzip.16 d28,d8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vshl.i64 d29,d13,#16 + vmlal.u32 q9,d28,d1[1] + + vadd.u64 d29,d29,d12 + veor d8,d8,d8 + subs r9,r9,#1 + vmul.u32 d29,d29,d30 + + vmlal.u32 q10,d28,d2[0] + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + + vmlal.u32 q6,d29,d4[0] + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + bne .LNEON_outer8 + + vadd.u64 d12,d12,d10 + mov r7,sp + vshr.u64 d10,d12,#16 + mov r8,r5 + vadd.u64 d13,d13,d10 + add r6,sp,#96 + vshr.u64 d10,d13,#16 + vzip.16 d12,d13 + + b .LNEON_tail_entry + +.align 4 +.LNEON_8n: + veor q6,q6,q6 + sub r7,sp,#128 + veor q7,q7,q7 + sub r7,r7,r5,lsl#4 + veor q8,q8,q8 + and r7,r7,#-64 + veor q9,q9,q9 + mov sp,r7 @ alloca + veor q10,q10,q10 + add r7,r7,#256 + veor q11,q11,q11 + sub r8,r5,#8 + veor q12,q12,q12 + veor q13,q13,q13 + +.LNEON_8n_init: + vst1.64 {q6,q7},[r7,:256]! + subs r8,r8,#8 + vst1.64 {q8,q9},[r7,:256]! + vst1.64 {q10,q11},[r7,:256]! + vst1.64 {q12,q13},[r7,:256]! + bne .LNEON_8n_init + + add r6,sp,#256 + vld1.32 {d0,d1,d2,d3},[r1]! + add r10,sp,#8 + vld1.32 {d30[0]},[r4,:32] + mov r9,r5 + b .LNEON_8n_outer + +.align 4 +.LNEON_8n_outer: + vld1.32 {d28[0]},[r2,:32]! @ *b++ + veor d8,d8,d8 + vzip.16 d28,d8 + add r7,sp,#128 + vld1.32 {d4,d5,d6,d7},[r3]! + + vmlal.u32 q6,d28,d0[0] + vmlal.u32 q7,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q8,d28,d1[0] + vshl.i64 d29,d13,#16 + vmlal.u32 q9,d28,d1[1] + vadd.u64 d29,d29,d12 + vmlal.u32 q10,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q11,d28,d2[1] + vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q6,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q7,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q8,d29,d5[0] + vshr.u64 d12,d12,#16 + vmlal.u32 q9,d29,d5[1] + vmlal.u32 q10,d29,d6[0] + vadd.u64 d12,d12,d13 + vmlal.u32 q11,d29,d6[1] + vshr.u64 d12,d12,#16 + vmlal.u32 q12,d29,d7[0] + vmlal.u32 q13,d29,d7[1] + vadd.u64 d14,d14,d12 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0] + vmlal.u32 q7,d28,d0[0] + vld1.64 {q6},[r6,:128]! + vmlal.u32 q8,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q9,d28,d1[0] + vshl.i64 d29,d15,#16 + vmlal.u32 q10,d28,d1[1] + vadd.u64 d29,d29,d14 + vmlal.u32 q11,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q12,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1] + vmlal.u32 q13,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q6,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q7,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q8,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q9,d29,d5[0] + vshr.u64 d14,d14,#16 + vmlal.u32 q10,d29,d5[1] + vmlal.u32 q11,d29,d6[0] + vadd.u64 d14,d14,d15 + vmlal.u32 q12,d29,d6[1] + vshr.u64 d14,d14,#16 + vmlal.u32 q13,d29,d7[0] + vmlal.u32 q6,d29,d7[1] + vadd.u64 d16,d16,d14 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1] + vmlal.u32 q8,d28,d0[0] + vld1.64 {q7},[r6,:128]! + vmlal.u32 q9,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q10,d28,d1[0] + vshl.i64 d29,d17,#16 + vmlal.u32 q11,d28,d1[1] + vadd.u64 d29,d29,d16 + vmlal.u32 q12,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q13,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2] + vmlal.u32 q6,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q7,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q8,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q9,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q10,d29,d5[0] + vshr.u64 d16,d16,#16 + vmlal.u32 q11,d29,d5[1] + vmlal.u32 q12,d29,d6[0] + vadd.u64 d16,d16,d17 + vmlal.u32 q13,d29,d6[1] + vshr.u64 d16,d16,#16 + vmlal.u32 q6,d29,d7[0] + vmlal.u32 q7,d29,d7[1] + vadd.u64 d18,d18,d16 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2] + vmlal.u32 q9,d28,d0[0] + vld1.64 {q8},[r6,:128]! + vmlal.u32 q10,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q11,d28,d1[0] + vshl.i64 d29,d19,#16 + vmlal.u32 q12,d28,d1[1] + vadd.u64 d29,d29,d18 + vmlal.u32 q13,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q6,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3] + vmlal.u32 q7,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q8,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q9,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q10,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q11,d29,d5[0] + vshr.u64 d18,d18,#16 + vmlal.u32 q12,d29,d5[1] + vmlal.u32 q13,d29,d6[0] + vadd.u64 d18,d18,d19 + vmlal.u32 q6,d29,d6[1] + vshr.u64 d18,d18,#16 + vmlal.u32 q7,d29,d7[0] + vmlal.u32 q8,d29,d7[1] + vadd.u64 d20,d20,d18 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3] + vmlal.u32 q10,d28,d0[0] + vld1.64 {q9},[r6,:128]! + vmlal.u32 q11,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q12,d28,d1[0] + vshl.i64 d29,d21,#16 + vmlal.u32 q13,d28,d1[1] + vadd.u64 d29,d29,d20 + vmlal.u32 q6,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q7,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4] + vmlal.u32 q8,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q9,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q10,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q11,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q12,d29,d5[0] + vshr.u64 d20,d20,#16 + vmlal.u32 q13,d29,d5[1] + vmlal.u32 q6,d29,d6[0] + vadd.u64 d20,d20,d21 + vmlal.u32 q7,d29,d6[1] + vshr.u64 d20,d20,#16 + vmlal.u32 q8,d29,d7[0] + vmlal.u32 q9,d29,d7[1] + vadd.u64 d22,d22,d20 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4] + vmlal.u32 q11,d28,d0[0] + vld1.64 {q10},[r6,:128]! + vmlal.u32 q12,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q13,d28,d1[0] + vshl.i64 d29,d23,#16 + vmlal.u32 q6,d28,d1[1] + vadd.u64 d29,d29,d22 + vmlal.u32 q7,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q8,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5] + vmlal.u32 q9,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q10,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q11,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q12,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q13,d29,d5[0] + vshr.u64 d22,d22,#16 + vmlal.u32 q6,d29,d5[1] + vmlal.u32 q7,d29,d6[0] + vadd.u64 d22,d22,d23 + vmlal.u32 q8,d29,d6[1] + vshr.u64 d22,d22,#16 + vmlal.u32 q9,d29,d7[0] + vmlal.u32 q10,d29,d7[1] + vadd.u64 d24,d24,d22 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5] + vmlal.u32 q12,d28,d0[0] + vld1.64 {q11},[r6,:128]! + vmlal.u32 q13,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q6,d28,d1[0] + vshl.i64 d29,d25,#16 + vmlal.u32 q7,d28,d1[1] + vadd.u64 d29,d29,d24 + vmlal.u32 q8,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q9,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6] + vmlal.u32 q10,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q11,d28,d3[1] + vld1.32 {d28[0]},[r2,:32]! @ *b++ + vmlal.u32 q12,d29,d4[0] + veor d10,d10,d10 + vmlal.u32 q13,d29,d4[1] + vzip.16 d28,d10 + vmlal.u32 q6,d29,d5[0] + vshr.u64 d24,d24,#16 + vmlal.u32 q7,d29,d5[1] + vmlal.u32 q8,d29,d6[0] + vadd.u64 d24,d24,d25 + vmlal.u32 q9,d29,d6[1] + vshr.u64 d24,d24,#16 + vmlal.u32 q10,d29,d7[0] + vmlal.u32 q11,d29,d7[1] + vadd.u64 d26,d26,d24 + vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6] + vmlal.u32 q13,d28,d0[0] + vld1.64 {q12},[r6,:128]! + vmlal.u32 q6,d28,d0[1] + veor d8,d8,d8 + vmlal.u32 q7,d28,d1[0] + vshl.i64 d29,d27,#16 + vmlal.u32 q8,d28,d1[1] + vadd.u64 d29,d29,d26 + vmlal.u32 q9,d28,d2[0] + vmul.u32 d29,d29,d30 + vmlal.u32 q10,d28,d2[1] + vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7] + vmlal.u32 q11,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q12,d28,d3[1] + vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0] + vmlal.u32 q13,d29,d4[0] + vld1.32 {d0,d1,d2,d3},[r1]! + vmlal.u32 q6,d29,d4[1] + vmlal.u32 q7,d29,d5[0] + vshr.u64 d26,d26,#16 + vmlal.u32 q8,d29,d5[1] + vmlal.u32 q9,d29,d6[0] + vadd.u64 d26,d26,d27 + vmlal.u32 q10,d29,d6[1] + vshr.u64 d26,d26,#16 + vmlal.u32 q11,d29,d7[0] + vmlal.u32 q12,d29,d7[1] + vadd.u64 d12,d12,d26 + vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7] + add r10,sp,#8 @ rewind + sub r8,r5,#8 + b .LNEON_8n_inner + +.align 4 +.LNEON_8n_inner: + subs r8,r8,#8 + vmlal.u32 q6,d28,d0[0] + vld1.64 {q13},[r6,:128] + vmlal.u32 q7,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0] + vmlal.u32 q8,d28,d1[0] + vld1.32 {d4,d5,d6,d7},[r3]! + vmlal.u32 q9,d28,d1[1] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q10,d28,d2[0] + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vmlal.u32 q13,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1] + vmlal.u32 q6,d29,d4[0] + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + vmlal.u32 q10,d29,d6[0] + vmlal.u32 q11,d29,d6[1] + vmlal.u32 q12,d29,d7[0] + vmlal.u32 q13,d29,d7[1] + vst1.64 {q6},[r7,:128]! + vmlal.u32 q7,d28,d0[0] + vld1.64 {q6},[r6,:128] + vmlal.u32 q8,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1] + vmlal.u32 q9,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q10,d28,d1[1] + vmlal.u32 q11,d28,d2[0] + vmlal.u32 q12,d28,d2[1] + vmlal.u32 q13,d28,d3[0] + vmlal.u32 q6,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2] + vmlal.u32 q7,d29,d4[0] + vmlal.u32 q8,d29,d4[1] + vmlal.u32 q9,d29,d5[0] + vmlal.u32 q10,d29,d5[1] + vmlal.u32 q11,d29,d6[0] + vmlal.u32 q12,d29,d6[1] + vmlal.u32 q13,d29,d7[0] + vmlal.u32 q6,d29,d7[1] + vst1.64 {q7},[r7,:128]! + vmlal.u32 q8,d28,d0[0] + vld1.64 {q7},[r6,:128] + vmlal.u32 q9,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2] + vmlal.u32 q10,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q11,d28,d1[1] + vmlal.u32 q12,d28,d2[0] + vmlal.u32 q13,d28,d2[1] + vmlal.u32 q6,d28,d3[0] + vmlal.u32 q7,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3] + vmlal.u32 q8,d29,d4[0] + vmlal.u32 q9,d29,d4[1] + vmlal.u32 q10,d29,d5[0] + vmlal.u32 q11,d29,d5[1] + vmlal.u32 q12,d29,d6[0] + vmlal.u32 q13,d29,d6[1] + vmlal.u32 q6,d29,d7[0] + vmlal.u32 q7,d29,d7[1] + vst1.64 {q8},[r7,:128]! + vmlal.u32 q9,d28,d0[0] + vld1.64 {q8},[r6,:128] + vmlal.u32 q10,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3] + vmlal.u32 q11,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q12,d28,d1[1] + vmlal.u32 q13,d28,d2[0] + vmlal.u32 q6,d28,d2[1] + vmlal.u32 q7,d28,d3[0] + vmlal.u32 q8,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4] + vmlal.u32 q9,d29,d4[0] + vmlal.u32 q10,d29,d4[1] + vmlal.u32 q11,d29,d5[0] + vmlal.u32 q12,d29,d5[1] + vmlal.u32 q13,d29,d6[0] + vmlal.u32 q6,d29,d6[1] + vmlal.u32 q7,d29,d7[0] + vmlal.u32 q8,d29,d7[1] + vst1.64 {q9},[r7,:128]! + vmlal.u32 q10,d28,d0[0] + vld1.64 {q9},[r6,:128] + vmlal.u32 q11,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4] + vmlal.u32 q12,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q13,d28,d1[1] + vmlal.u32 q6,d28,d2[0] + vmlal.u32 q7,d28,d2[1] + vmlal.u32 q8,d28,d3[0] + vmlal.u32 q9,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5] + vmlal.u32 q10,d29,d4[0] + vmlal.u32 q11,d29,d4[1] + vmlal.u32 q12,d29,d5[0] + vmlal.u32 q13,d29,d5[1] + vmlal.u32 q6,d29,d6[0] + vmlal.u32 q7,d29,d6[1] + vmlal.u32 q8,d29,d7[0] + vmlal.u32 q9,d29,d7[1] + vst1.64 {q10},[r7,:128]! + vmlal.u32 q11,d28,d0[0] + vld1.64 {q10},[r6,:128] + vmlal.u32 q12,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5] + vmlal.u32 q13,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q6,d28,d1[1] + vmlal.u32 q7,d28,d2[0] + vmlal.u32 q8,d28,d2[1] + vmlal.u32 q9,d28,d3[0] + vmlal.u32 q10,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6] + vmlal.u32 q11,d29,d4[0] + vmlal.u32 q12,d29,d4[1] + vmlal.u32 q13,d29,d5[0] + vmlal.u32 q6,d29,d5[1] + vmlal.u32 q7,d29,d6[0] + vmlal.u32 q8,d29,d6[1] + vmlal.u32 q9,d29,d7[0] + vmlal.u32 q10,d29,d7[1] + vst1.64 {q11},[r7,:128]! + vmlal.u32 q12,d28,d0[0] + vld1.64 {q11},[r6,:128] + vmlal.u32 q13,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6] + vmlal.u32 q6,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q7,d28,d1[1] + vmlal.u32 q8,d28,d2[0] + vmlal.u32 q9,d28,d2[1] + vmlal.u32 q10,d28,d3[0] + vmlal.u32 q11,d28,d3[1] + vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7] + vmlal.u32 q12,d29,d4[0] + vmlal.u32 q13,d29,d4[1] + vmlal.u32 q6,d29,d5[0] + vmlal.u32 q7,d29,d5[1] + vmlal.u32 q8,d29,d6[0] + vmlal.u32 q9,d29,d6[1] + vmlal.u32 q10,d29,d7[0] + vmlal.u32 q11,d29,d7[1] + vst1.64 {q12},[r7,:128]! + vmlal.u32 q13,d28,d0[0] + vld1.64 {q12},[r6,:128] + vmlal.u32 q6,d28,d0[1] + vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7] + vmlal.u32 q7,d28,d1[0] + it ne + addne r6,r6,#16 @ don't advance in last iteration + vmlal.u32 q8,d28,d1[1] + vmlal.u32 q9,d28,d2[0] + vmlal.u32 q10,d28,d2[1] + vmlal.u32 q11,d28,d3[0] + vmlal.u32 q12,d28,d3[1] + it eq + subeq r1,r1,r5,lsl#2 @ rewind + vmlal.u32 q13,d29,d4[0] + vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0] + vmlal.u32 q6,d29,d4[1] + vld1.32 {d0,d1,d2,d3},[r1]! + vmlal.u32 q7,d29,d5[0] + add r10,sp,#8 @ rewind + vmlal.u32 q8,d29,d5[1] + vmlal.u32 q9,d29,d6[0] + vmlal.u32 q10,d29,d6[1] + vmlal.u32 q11,d29,d7[0] + vst1.64 {q13},[r7,:128]! + vmlal.u32 q12,d29,d7[1] + + bne .LNEON_8n_inner + add r6,sp,#128 + vst1.64 {q6,q7},[r7,:256]! + veor q2,q2,q2 @ d4-d5 + vst1.64 {q8,q9},[r7,:256]! + veor q3,q3,q3 @ d6-d7 + vst1.64 {q10,q11},[r7,:256]! + vst1.64 {q12},[r7,:128] + + subs r9,r9,#8 + vld1.64 {q6,q7},[r6,:256]! + vld1.64 {q8,q9},[r6,:256]! + vld1.64 {q10,q11},[r6,:256]! + vld1.64 {q12,q13},[r6,:256]! + + itt ne + subne r3,r3,r5,lsl#2 @ rewind + bne .LNEON_8n_outer + + add r7,sp,#128 + vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame + vshr.u64 d10,d12,#16 + vst1.64 {q2,q3},[sp,:256]! + vadd.u64 d13,d13,d10 + vst1.64 {q2,q3}, [sp,:256]! + vshr.u64 d10,d13,#16 + vst1.64 {q2,q3}, [sp,:256]! + vzip.16 d12,d13 + + mov r8,r5 + b .LNEON_tail_entry + +.align 4 +.LNEON_tail: + vadd.u64 d12,d12,d10 + vshr.u64 d10,d12,#16 + vld1.64 {q8,q9}, [r6, :256]! + vadd.u64 d13,d13,d10 + vld1.64 {q10,q11}, [r6, :256]! + vshr.u64 d10,d13,#16 + vld1.64 {q12,q13}, [r6, :256]! + vzip.16 d12,d13 + +.LNEON_tail_entry: + vadd.u64 d14,d14,d10 + vst1.32 {d12[0]}, [r7, :32]! + vshr.u64 d10,d14,#16 + vadd.u64 d15,d15,d10 + vshr.u64 d10,d15,#16 + vzip.16 d14,d15 + vadd.u64 d16,d16,d10 + vst1.32 {d14[0]}, [r7, :32]! + vshr.u64 d10,d16,#16 + vadd.u64 d17,d17,d10 + vshr.u64 d10,d17,#16 + vzip.16 d16,d17 + vadd.u64 d18,d18,d10 + vst1.32 {d16[0]}, [r7, :32]! + vshr.u64 d10,d18,#16 + vadd.u64 d19,d19,d10 + vshr.u64 d10,d19,#16 + vzip.16 d18,d19 + vadd.u64 d20,d20,d10 + vst1.32 {d18[0]}, [r7, :32]! + vshr.u64 d10,d20,#16 + vadd.u64 d21,d21,d10 + vshr.u64 d10,d21,#16 + vzip.16 d20,d21 + vadd.u64 d22,d22,d10 + vst1.32 {d20[0]}, [r7, :32]! + vshr.u64 d10,d22,#16 + vadd.u64 d23,d23,d10 + vshr.u64 d10,d23,#16 + vzip.16 d22,d23 + vadd.u64 d24,d24,d10 + vst1.32 {d22[0]}, [r7, :32]! + vshr.u64 d10,d24,#16 + vadd.u64 d25,d25,d10 + vshr.u64 d10,d25,#16 + vzip.16 d24,d25 + vadd.u64 d26,d26,d10 + vst1.32 {d24[0]}, [r7, :32]! + vshr.u64 d10,d26,#16 + vadd.u64 d27,d27,d10 + vshr.u64 d10,d27,#16 + vzip.16 d26,d27 + vld1.64 {q6,q7}, [r6, :256]! + subs r8,r8,#8 + vst1.32 {d26[0]}, [r7, :32]! + bne .LNEON_tail + + vst1.32 {d10[0]}, [r7, :32] @ top-most bit + sub r3,r3,r5,lsl#2 @ rewind r3 + subs r1,sp,#0 @ clear carry flag + add r2,sp,r5,lsl#2 + +.LNEON_sub: + ldmia r1!, {r4,r5,r6,r7} + ldmia r3!, {r8,r9,r10,r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq r1,r2 @ preserves carry + stmia r0!, {r8,r9,r10,r11} + bne .LNEON_sub + + ldr r10, [r1] @ load top-most bit + mov r11,sp + veor q0,q0,q0 + sub r11,r2,r11 @ this is num*4 + veor q1,q1,q1 + mov r1,sp + sub r0,r0,r11 @ rewind r0 + mov r3,r2 @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia r1!, {r4,r5,r6,r7} + ldmia r0, {r8,r9,r10,r11} + it cc + movcc r8, r4 + vst1.64 {q0,q1}, [r3,:256]! @ wipe + itt cc + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0,q1}, [r3,:256]! @ wipe + it cc + movcc r11,r7 + ldmia r1, {r4,r5,r6,r7} + stmia r0!, {r8,r9,r10,r11} + sub r1,r1,#16 + ldmia r0, {r8,r9,r10,r11} + it cc + movcc r8, r4 + vst1.64 {q0,q1}, [r1,:256]! @ wipe + itt cc + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0,q1}, [r3,:256]! @ wipe + it cc + movcc r11,r7 + teq r1,r2 @ preserves carry + stmia r0!, {r8,r9,r10,r11} + bne .LNEON_copy_n_zap + + mov sp,ip + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11} + bx lr @ bx lr +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/armv8-mont-apple.S b/yass/third_party/boringssl/src/gen/bcm/armv8-mont-apple.S new file mode 100644 index 0000000000..cf798a357d --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/armv8-mont-apple.S @@ -0,0 +1,1425 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.text + +.globl _bn_mul_mont +.private_extern _bn_mul_mont + +.align 5 +_bn_mul_mont: + AARCH64_SIGN_LINK_REGISTER + tst x5,#7 + b.eq __bn_sqr8x_mont + tst x5,#3 + b.eq __bn_mul4x_mont +Lmul_mont: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldr x9,[x2],#8 // bp[0] + sub x22,sp,x5,lsl#3 + ldp x7,x8,[x1],#16 // ap[0..1] + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + and x22,x22,#-16 // ABI says so + ldp x13,x14,[x3],#16 // np[0..1] + + mul x6,x7,x9 // ap[0]*bp[0] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + mul x10,x8,x9 // ap[1]*bp[0] + umulh x11,x8,x9 + + mul x15,x6,x4 // "tp[0]"*n0 + mov sp,x22 // alloca + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 // discarded + // (*) As for removal of first multiplication and addition + // instructions. The outcome of first addition is + // guaranteed to be zero, which leaves two computationally + // significant outcomes: it either carries or not. Then + // question is when does it carry? Is there alternative + // way to deduce it? If you follow operations, you can + // observe that condition for carry is quite simple: + // x6 being non-zero. So that carry can be calculated + // by adding -1 to x6. That's what next instruction does. + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + adc x13,x13,xzr + cbz x21,L1st_skip + +L1st: + ldr x8,[x1],#8 + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + ldr x14,[x3],#8 + adds x12,x16,x13 + mul x10,x8,x9 // ap[j]*bp[0] + adc x13,x17,xzr + umulh x11,x8,x9 + + adds x12,x12,x6 + mul x16,x14,x15 // np[j]*m1 + adc x13,x13,xzr + umulh x17,x14,x15 + str x12,[x22],#8 // tp[j-1] + cbnz x21,L1st + +L1st_skip: + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adc x13,x17,xzr + + adds x12,x12,x6 + sub x20,x5,#8 // i=num-1 + adcs x13,x13,x7 + + adc x19,xzr,xzr // upmost overflow bit + stp x12,x13,[x22] + +Louter: + ldr x9,[x2],#8 // bp[i] + ldp x7,x8,[x1],#16 + ldr x23,[sp] // tp[0] + add x22,sp,#8 + + mul x6,x7,x9 // ap[0]*bp[i] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + ldp x13,x14,[x3],#16 + mul x10,x8,x9 // ap[1]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x15,x6,x4 + sub x20,x20,#8 // i-- + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + cbz x21,Linner_skip + +Linner: + ldr x8,[x1],#8 + adc x13,x13,xzr + ldr x23,[x22],#8 // tp[j] + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + adds x12,x16,x13 + ldr x14,[x3],#8 + adc x13,x17,xzr + + mul x10,x8,x9 // ap[j]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x16,x14,x15 // np[j]*m1 + adds x12,x12,x6 + umulh x17,x14,x15 + str x12,[x22,#-16] // tp[j-1] + cbnz x21,Linner + +Linner_skip: + ldr x23,[x22],#8 // tp[j] + adc x13,x13,xzr + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adcs x13,x17,x19 + adc x19,xzr,xzr + + adds x6,x6,x23 + adc x7,x7,xzr + + adds x12,x12,x6 + adcs x13,x13,x7 + adc x19,x19,xzr // upmost overflow bit + stp x12,x13,[x22,#-16] + + cbnz x20,Louter + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x14,[x3],#8 // np[0] + subs x21,x5,#8 // j=num-1 and clear borrow + mov x1,x0 +Lsub: + sbcs x8,x23,x14 // tp[j]-np[j] + ldr x23,[x22],#8 + sub x21,x21,#8 // j-- + ldr x14,[x3],#8 + str x8,[x1],#8 // rp[j]=tp[j]-np[j] + cbnz x21,Lsub + + sbcs x8,x23,x14 + sbcs x19,x19,xzr // did it borrow? + str x8,[x1],#8 // rp[num-1] + + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x8,[x0],#8 // rp[0] + sub x5,x5,#8 // num-- + nop +Lcond_copy: + sub x5,x5,#8 // num-- + csel x14,x23,x8,lo // did it borrow? + ldr x23,[x22],#8 + ldr x8,[x0],#8 + str xzr,[x22,#-16] // wipe tp + str x14,[x0,#-16] + cbnz x5,Lcond_copy + + csel x14,x23,x8,lo + str xzr,[x22,#-8] // wipe tp + str x14,[x0,#-8] + + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.align 5 +__bn_sqr8x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to + // only from bn_mul_mont which has already signed the return address. + cmp x1,x2 + b.ne __bn_mul4x_mont +Lsqr8x_mont: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x3,[sp,#96] // offload rp and np + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + ldp x12,x13,[x1,#8*6] + + sub x2,sp,x5,lsl#4 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + mov sp,x2 // alloca + sub x27,x5,#8*8 + b Lsqr8x_zero_start + +Lsqr8x_zero: + sub x27,x27,#8*8 + stp xzr,xzr,[x2,#8*0] + stp xzr,xzr,[x2,#8*2] + stp xzr,xzr,[x2,#8*4] + stp xzr,xzr,[x2,#8*6] +Lsqr8x_zero_start: + stp xzr,xzr,[x2,#8*8] + stp xzr,xzr,[x2,#8*10] + stp xzr,xzr,[x2,#8*12] + stp xzr,xzr,[x2,#8*14] + add x2,x2,#8*16 + cbnz x27,Lsqr8x_zero + + add x3,x1,x5 + add x1,x1,#8*8 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + mov x23,xzr + mov x24,xzr + mov x25,xzr + mov x26,xzr + mov x2,sp + str x4,[x29,#112] // offload n0 + + // Multiply everything but a[i]*a[i] +.align 4 +Lsqr8x_outer_loop: + // a[1]a[0] (i) + // a[2]a[0] + // a[3]a[0] + // a[4]a[0] + // a[5]a[0] + // a[6]a[0] + // a[7]a[0] + // a[2]a[1] (ii) + // a[3]a[1] + // a[4]a[1] + // a[5]a[1] + // a[6]a[1] + // a[7]a[1] + // a[3]a[2] (iii) + // a[4]a[2] + // a[5]a[2] + // a[6]a[2] + // a[7]a[2] + // a[4]a[3] (iv) + // a[5]a[3] + // a[6]a[3] + // a[7]a[3] + // a[5]a[4] (v) + // a[6]a[4] + // a[7]a[4] + // a[6]a[5] (vi) + // a[7]a[5] + // a[7]a[6] (vii) + + mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) + mul x15,x8,x6 + mul x16,x9,x6 + mul x17,x10,x6 + adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) + mul x14,x11,x6 + adcs x21,x21,x15 + mul x15,x12,x6 + adcs x22,x22,x16 + mul x16,x13,x6 + adcs x23,x23,x17 + umulh x17,x7,x6 // hi(a[1..7]*a[0]) + adcs x24,x24,x14 + umulh x14,x8,x6 + adcs x25,x25,x15 + umulh x15,x9,x6 + adcs x26,x26,x16 + umulh x16,x10,x6 + stp x19,x20,[x2],#8*2 // t[0..1] + adc x19,xzr,xzr // t[8] + adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) + umulh x17,x11,x6 + adcs x22,x22,x14 + umulh x14,x12,x6 + adcs x23,x23,x15 + umulh x15,x13,x6 + adcs x24,x24,x16 + mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) + adcs x25,x25,x17 + mul x17,x9,x7 + adcs x26,x26,x14 + mul x14,x10,x7 + adc x19,x19,x15 + + mul x15,x11,x7 + adds x22,x22,x16 + mul x16,x12,x7 + adcs x23,x23,x17 + mul x17,x13,x7 + adcs x24,x24,x14 + umulh x14,x8,x7 // hi(a[2..7]*a[1]) + adcs x25,x25,x15 + umulh x15,x9,x7 + adcs x26,x26,x16 + umulh x16,x10,x7 + adcs x19,x19,x17 + umulh x17,x11,x7 + stp x21,x22,[x2],#8*2 // t[2..3] + adc x20,xzr,xzr // t[9] + adds x23,x23,x14 + umulh x14,x12,x7 + adcs x24,x24,x15 + umulh x15,x13,x7 + adcs x25,x25,x16 + mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) + adcs x26,x26,x17 + mul x17,x10,x8 + adcs x19,x19,x14 + mul x14,x11,x8 + adc x20,x20,x15 + + mul x15,x12,x8 + adds x24,x24,x16 + mul x16,x13,x8 + adcs x25,x25,x17 + umulh x17,x9,x8 // hi(a[3..7]*a[2]) + adcs x26,x26,x14 + umulh x14,x10,x8 + adcs x19,x19,x15 + umulh x15,x11,x8 + adcs x20,x20,x16 + umulh x16,x12,x8 + stp x23,x24,[x2],#8*2 // t[4..5] + adc x21,xzr,xzr // t[10] + adds x25,x25,x17 + umulh x17,x13,x8 + adcs x26,x26,x14 + mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) + adcs x19,x19,x15 + mul x15,x11,x9 + adcs x20,x20,x16 + mul x16,x12,x9 + adc x21,x21,x17 + + mul x17,x13,x9 + adds x26,x26,x14 + umulh x14,x10,x9 // hi(a[4..7]*a[3]) + adcs x19,x19,x15 + umulh x15,x11,x9 + adcs x20,x20,x16 + umulh x16,x12,x9 + adcs x21,x21,x17 + umulh x17,x13,x9 + stp x25,x26,[x2],#8*2 // t[6..7] + adc x22,xzr,xzr // t[11] + adds x19,x19,x14 + mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) + adcs x20,x20,x15 + mul x15,x12,x10 + adcs x21,x21,x16 + mul x16,x13,x10 + adc x22,x22,x17 + + umulh x17,x11,x10 // hi(a[5..7]*a[4]) + adds x20,x20,x14 + umulh x14,x12,x10 + adcs x21,x21,x15 + umulh x15,x13,x10 + adcs x22,x22,x16 + mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) + adc x23,xzr,xzr // t[12] + adds x21,x21,x17 + mul x17,x13,x11 + adcs x22,x22,x14 + umulh x14,x12,x11 // hi(a[6..7]*a[5]) + adc x23,x23,x15 + + umulh x15,x13,x11 + adds x22,x22,x16 + mul x16,x13,x12 // lo(a[7]*a[6]) (vii) + adcs x23,x23,x17 + umulh x17,x13,x12 // hi(a[7]*a[6]) + adc x24,xzr,xzr // t[13] + adds x23,x23,x14 + sub x27,x3,x1 // done yet? + adc x24,x24,x15 + + adds x24,x24,x16 + sub x14,x3,x5 // rewinded ap + adc x25,xzr,xzr // t[14] + add x25,x25,x17 + + cbz x27,Lsqr8x_outer_break + + mov x4,x6 + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x0,x1 + adcs x26,xzr,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved below + mov x27,#-8*8 + + // a[8]a[0] + // a[9]a[0] + // a[a]a[0] + // a[b]a[0] + // a[c]a[0] + // a[d]a[0] + // a[e]a[0] + // a[f]a[0] + // a[8]a[1] + // a[f]a[1]........................ + // a[8]a[2] + // a[f]a[2]........................ + // a[8]a[3] + // a[f]a[3]........................ + // a[8]a[4] + // a[f]a[4]........................ + // a[8]a[5] + // a[f]a[5]........................ + // a[8]a[6] + // a[f]a[6]........................ + // a[8]a[7] + // a[f]a[7]........................ +Lsqr8x_mul: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,Lsqr8x_mul + // note that carry flag is guaranteed + // to be zero at this point + cmp x1,x3 // done yet? + b.eq Lsqr8x_break + + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + ldr x4,[x0,#-8*8] + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b Lsqr8x_mul + +.align 4 +Lsqr8x_break: + ldp x6,x7,[x0,#8*0] + add x1,x0,#8*8 + ldp x8,x9,[x0,#8*2] + sub x14,x3,x1 // is it last iteration? + ldp x10,x11,[x0,#8*4] + sub x15,x2,x14 + ldp x12,x13,[x0,#8*6] + cbz x14,Lsqr8x_outer_loop + + stp x19,x20,[x2,#8*0] + ldp x19,x20,[x15,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x15,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x15,#8*4] + stp x25,x26,[x2,#8*6] + mov x2,x15 + ldp x25,x26,[x15,#8*6] + b Lsqr8x_outer_loop + +.align 4 +Lsqr8x_outer_break: + // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] + ldp x15,x16,[sp,#8*1] + ldp x11,x13,[x14,#8*2] + add x1,x14,#8*4 + ldp x17,x14,[sp,#8*3] + + stp x19,x20,[x2,#8*0] + mul x19,x7,x7 + stp x21,x22,[x2,#8*2] + umulh x7,x7,x7 + stp x23,x24,[x2,#8*4] + mul x8,x9,x9 + stp x25,x26,[x2,#8*6] + mov x2,sp + umulh x9,x9,x9 + adds x20,x7,x15,lsl#1 + extr x15,x16,x15,#63 + sub x27,x5,#8*4 + +Lsqr4x_shift_n_add: + adcs x21,x8,x15 + extr x16,x17,x16,#63 + sub x27,x27,#8*4 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + ldp x7,x9,[x1],#8*2 + umulh x11,x11,x11 + mul x12,x13,x13 + umulh x13,x13,x13 + extr x17,x14,x17,#63 + stp x19,x20,[x2,#8*0] + adcs x23,x10,x17 + extr x14,x15,x14,#63 + stp x21,x22,[x2,#8*2] + adcs x24,x11,x14 + ldp x17,x14,[x2,#8*7] + extr x15,x16,x15,#63 + adcs x25,x12,x15 + extr x16,x17,x16,#63 + adcs x26,x13,x16 + ldp x15,x16,[x2,#8*9] + mul x6,x7,x7 + ldp x11,x13,[x1],#8*2 + umulh x7,x7,x7 + mul x8,x9,x9 + umulh x9,x9,x9 + stp x23,x24,[x2,#8*4] + extr x17,x14,x17,#63 + stp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + adcs x19,x6,x17 + extr x14,x15,x14,#63 + adcs x20,x7,x14 + ldp x17,x14,[x2,#8*3] + extr x15,x16,x15,#63 + cbnz x27,Lsqr4x_shift_n_add + ldp x1,x4,[x29,#104] // pull np and n0 + + adcs x21,x8,x15 + extr x16,x17,x16,#63 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + umulh x11,x11,x11 + stp x19,x20,[x2,#8*0] + mul x12,x13,x13 + umulh x13,x13,x13 + stp x21,x22,[x2,#8*2] + extr x17,x14,x17,#63 + adcs x23,x10,x17 + extr x14,x15,x14,#63 + ldp x19,x20,[sp,#8*0] + adcs x24,x11,x14 + extr x15,x16,x15,#63 + ldp x6,x7,[x1,#8*0] + adcs x25,x12,x15 + extr x16,xzr,x16,#63 + ldp x8,x9,[x1,#8*2] + adc x26,x13,x16 + ldp x10,x11,[x1,#8*4] + + // Reduce by 512 bits per iteration + mul x28,x4,x19 // t[0]*n0 + ldp x12,x13,[x1,#8*6] + add x3,x1,x5 + ldp x21,x22,[sp,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[sp,#8*4] + stp x25,x26,[x2,#8*6] + ldp x25,x26,[sp,#8*6] + add x1,x1,#8*8 + mov x30,xzr // initial top-most carry + mov x2,sp + mov x27,#8 + +Lsqr8x_reduction: + // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) + mul x15,x7,x28 + sub x27,x27,#1 + mul x16,x8,x28 + str x28,[x2],#8 // put aside t[0]*n0 for tail processing + mul x17,x9,x28 + // (*) adds xzr,x19,x14 + subs xzr,x19,#1 // (*) + mul x14,x10,x28 + adcs x19,x20,x15 + mul x15,x11,x28 + adcs x20,x21,x16 + mul x16,x12,x28 + adcs x21,x22,x17 + mul x17,x13,x28 + adcs x22,x23,x14 + umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) + adcs x23,x24,x15 + umulh x15,x7,x28 + adcs x24,x25,x16 + umulh x16,x8,x28 + adcs x25,x26,x17 + umulh x17,x9,x28 + adc x26,xzr,xzr + adds x19,x19,x14 + umulh x14,x10,x28 + adcs x20,x20,x15 + umulh x15,x11,x28 + adcs x21,x21,x16 + umulh x16,x12,x28 + adcs x22,x22,x17 + umulh x17,x13,x28 + mul x28,x4,x19 // next t[0]*n0 + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adc x26,x26,x17 + cbnz x27,Lsqr8x_reduction + + ldp x14,x15,[x2,#8*0] + ldp x16,x17,[x2,#8*2] + mov x0,x2 + sub x27,x3,x1 // done yet? + adds x19,x19,x14 + adcs x20,x20,x15 + ldp x14,x15,[x2,#8*4] + adcs x21,x21,x16 + adcs x22,x22,x17 + ldp x16,x17,[x2,#8*6] + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adcs x26,x26,x17 + //adc x28,xzr,xzr // moved below + cbz x27,Lsqr8x8_post_condition + + ldr x4,[x2,#-8*8] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + mov x27,#-8*8 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + +Lsqr8x_tail: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,Lsqr8x_tail + // note that carry flag is guaranteed + // to be zero at this point + ldp x6,x7,[x2,#8*0] + sub x27,x3,x1 // done yet? + sub x16,x3,x5 // rewinded np + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + cbz x27,Lsqr8x_tail_break + + ldr x4,[x0,#-8*8] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b Lsqr8x_tail + +.align 4 +Lsqr8x_tail_break: + ldr x4,[x29,#112] // pull n0 + add x27,x2,#8*8 // end of current t[num] window + + subs xzr,x30,#1 // "move" top-most carry to carry bit + adcs x14,x19,x6 + adcs x15,x20,x7 + ldp x19,x20,[x0,#8*0] + adcs x21,x21,x8 + ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] + adcs x22,x22,x9 + ldp x8,x9,[x16,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x16,#8*4] + adcs x25,x25,x12 + adcs x26,x26,x13 + ldp x12,x13,[x16,#8*6] + add x1,x16,#8*8 + adc x30,xzr,xzr // top-most carry + mul x28,x4,x19 + stp x14,x15,[x2,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x0,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x0,#8*4] + cmp x27,x29 // did we hit the bottom? + stp x25,x26,[x2,#8*6] + mov x2,x0 // slide the window + ldp x25,x26,[x0,#8*6] + mov x27,#8 + b.ne Lsqr8x_reduction + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x0,[x29,#96] // pull rp + add x2,x2,#8*8 + subs x14,x19,x6 + sbcs x15,x20,x7 + sub x27,x5,#8*8 + mov x3,x0 // x0 copy + +Lsqr8x_sub: + sbcs x16,x21,x8 + ldp x6,x7,[x1,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x1,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x10,x11,[x1,#8*4] + sbcs x17,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + ldp x19,x20,[x2,#8*0] + sub x27,x27,#8*8 + ldp x21,x22,[x2,#8*2] + ldp x23,x24,[x2,#8*4] + ldp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + stp x14,x15,[x0,#8*4] + sbcs x14,x19,x6 + stp x16,x17,[x0,#8*6] + add x0,x0,#8*8 + sbcs x15,x20,x7 + cbnz x27,Lsqr8x_sub + + sbcs x16,x21,x8 + mov x2,sp + add x1,sp,x5 + ldp x6,x7,[x3,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x3,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x19,x20,[x1,#8*0] + sbcs x17,x26,x13 + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + stp x14,x15,[x0,#8*4] + stp x16,x17,[x0,#8*6] + + sub x27,x5,#8*4 +Lsqr4x_cond_copy: + sub x27,x27,#8*4 + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + ldp x6,x7,[x3,#8*4] + ldp x19,x20,[x1,#8*4] + csel x16,x21,x8,lo + stp xzr,xzr,[x2,#8*2] + add x2,x2,#8*4 + csel x17,x22,x9,lo + ldp x8,x9,[x3,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + stp xzr,xzr,[x1,#8*0] + stp xzr,xzr,[x1,#8*2] + cbnz x27,Lsqr4x_cond_copy + + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + stp xzr,xzr,[x2,#8*2] + csel x16,x21,x8,lo + csel x17,x22,x9,lo + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + + b Lsqr8x_done + +.align 4 +Lsqr8x8_post_condition: + adc x28,xzr,xzr + ldr x30,[x29,#8] // pull return address + // x19-7,x28 hold result, x6-7 hold modulus + subs x6,x19,x6 + ldr x1,[x29,#96] // pull rp + sbcs x7,x20,x7 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x8 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x9 + stp xzr,xzr,[sp,#8*4] + sbcs x10,x23,x10 + stp xzr,xzr,[sp,#8*6] + sbcs x11,x24,x11 + stp xzr,xzr,[sp,#8*8] + sbcs x12,x25,x12 + stp xzr,xzr,[sp,#8*10] + sbcs x13,x26,x13 + stp xzr,xzr,[sp,#8*12] + sbcs x28,x28,xzr // did it borrow? + stp xzr,xzr,[sp,#8*14] + + // x6-7 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + csel x10,x23,x10,lo + csel x11,x24,x11,lo + stp x8,x9,[x1,#8*2] + csel x12,x25,x12,lo + csel x13,x26,x13,lo + stp x10,x11,[x1,#8*4] + stp x12,x13,[x1,#8*6] + +Lsqr8x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is popped earlier + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.align 5 +__bn_mul4x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to + // only from bn_mul_mont or __bn_mul8x_mont which have already signed the + // return address. + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + sub x26,sp,x5,lsl#3 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + sub sp,x26,#8*4 // alloca + + add x10,x2,x5 + add x27,x1,x5 + stp x0,x10,[x29,#96] // offload rp and &b[num] + + ldr x24,[x2,#8*0] // b[0] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + ldp x14,x15,[x3,#8*0] // n[0..3] + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + mov x28,#0 + mov x26,sp + +Loop_mul4x_1st_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[0]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[0]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + sub x10,x27,x1 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_1st_reduction + + cbz x10,Lmul4x4_post_condition + + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldr x25,[sp] // a[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +Loop_mul4x_1st_tail: + mul x10,x6,x24 // lo(a[4..7]*b[i]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[i]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + adcs x23,x23,x0 + umulh x13,x17,x25 + adc x0,xzr,xzr + ldr x25,[sp,x28] // next t[0]*n0 + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_1st_tail + + sub x11,x27,x5 // rewinded x1 + cbz x10,Lmul4x_proceed + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b Loop_mul4x_1st_tail + +.align 5 +Lmul4x_proceed: + ldr x24,[x2,#8*4]! // *++b + adc x30,x0,xzr + ldp x6,x7,[x11,#8*0] // a[0..3] + sub x3,x3,x5 // rewind np + ldp x8,x9,[x11,#8*2] + add x1,x11,#8*4 + + stp x19,x20,[x26,#8*0] // result!!! + ldp x19,x20,[sp,#8*4] // t[0..3] + stp x21,x22,[x26,#8*2] // result!!! + ldp x21,x22,[sp,#8*6] + + ldp x14,x15,[x3,#8*0] // n[0..3] + mov x26,sp + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + +.align 4 +Loop_mul4x_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[4]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + // (*) mul x10,x14,x25 + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_reduction + + adc x0,x0,xzr + ldp x10,x11,[x26,#8*4] // t[4..7] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + + ldr x25,[sp] // t[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +.align 4 +Loop_mul4x_tail: + mul x10,x6,x24 // lo(a[4..7]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[4]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + umulh x13,x17,x25 + adcs x23,x23,x0 + ldr x25,[sp,x28] // next a[0]*n0 + adc x0,xzr,xzr + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_tail + + sub x11,x3,x5 // rewinded np? + adc x0,x0,xzr + cbz x10,Loop_mul4x_break + + ldp x10,x11,[x26,#8*4] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b Loop_mul4x_tail + +.align 4 +Loop_mul4x_break: + ldp x12,x13,[x29,#96] // pull rp and &b[num] + adds x19,x19,x30 + add x2,x2,#8*4 // bp++ + adcs x20,x20,xzr + sub x1,x1,x5 // rewind ap + adcs x21,x21,xzr + stp x19,x20,[x26,#8*0] // result!!! + adcs x22,x22,xzr + ldp x19,x20,[sp,#8*4] // t[0..3] + adc x30,x0,xzr + stp x21,x22,[x26,#8*2] // result!!! + cmp x2,x13 // done yet? + ldp x21,x22,[sp,#8*6] + ldp x14,x15,[x11,#8*0] // n[0..3] + ldp x16,x17,[x11,#8*2] + add x3,x11,#8*4 + b.eq Lmul4x_post + + ldr x24,[x2] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + adds x1,x1,#8*4 // clear carry bit + mov x0,xzr + mov x26,sp + b Loop_mul4x_reduction + +.align 4 +Lmul4x_post: + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + mov x0,x12 + mov x27,x12 // x0 copy + subs x10,x19,x14 + add x26,sp,#8*8 + sbcs x11,x20,x15 + sub x28,x5,#8*4 + +Lmul4x_sub: + sbcs x12,x21,x16 + ldp x14,x15,[x3,#8*0] + sub x28,x28,#8*4 + ldp x19,x20,[x26,#8*0] + sbcs x13,x22,x17 + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + ldp x21,x22,[x26,#8*2] + add x26,x26,#8*4 + stp x10,x11,[x0,#8*0] + sbcs x10,x19,x14 + stp x12,x13,[x0,#8*2] + add x0,x0,#8*4 + sbcs x11,x20,x15 + cbnz x28,Lmul4x_sub + + sbcs x12,x21,x16 + mov x26,sp + add x1,sp,#8*4 + ldp x6,x7,[x27,#8*0] + sbcs x13,x22,x17 + stp x10,x11,[x0,#8*0] + ldp x8,x9,[x27,#8*2] + stp x12,x13,[x0,#8*2] + ldp x19,x20,[x1,#8*0] + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + + sub x28,x5,#8*4 +Lmul4x_cond_copy: + sub x28,x28,#8*4 + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + ldp x6,x7,[x27,#8*4] + ldp x19,x20,[x1,#8*4] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*2] + add x26,x26,#8*4 + csel x13,x22,x9,lo + ldp x8,x9,[x27,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + add x27,x27,#8*4 + cbnz x28,Lmul4x_cond_copy + + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + stp xzr,xzr,[x26,#8*2] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*3] + csel x13,x22,x9,lo + stp xzr,xzr,[x26,#8*4] + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + + b Lmul4x_done + +.align 4 +Lmul4x4_post_condition: + adc x0,x0,xzr + ldr x1,[x29,#96] // pull rp + // x19-3,x0 hold result, x14-7 hold modulus + subs x6,x19,x14 + ldr x30,[x29,#8] // pull return address + sbcs x7,x20,x15 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x16 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x17 + stp xzr,xzr,[sp,#8*4] + sbcs xzr,x0,xzr // did it borrow? + stp xzr,xzr,[sp,#8*6] + + // x6-3 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + stp x8,x9,[x1,#8*2] + +Lmul4x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is popped earlier + AARCH64_VALIDATE_LINK_REGISTER + ret + +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 4 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/armv8-mont-linux.S b/yass/third_party/boringssl/src/gen/bcm/armv8-mont-linux.S new file mode 100644 index 0000000000..13f045c73f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/armv8-mont-linux.S @@ -0,0 +1,1425 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.text + +.globl bn_mul_mont +.hidden bn_mul_mont +.type bn_mul_mont,%function +.align 5 +bn_mul_mont: + AARCH64_SIGN_LINK_REGISTER + tst x5,#7 + b.eq __bn_sqr8x_mont + tst x5,#3 + b.eq __bn_mul4x_mont +.Lmul_mont: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldr x9,[x2],#8 // bp[0] + sub x22,sp,x5,lsl#3 + ldp x7,x8,[x1],#16 // ap[0..1] + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + and x22,x22,#-16 // ABI says so + ldp x13,x14,[x3],#16 // np[0..1] + + mul x6,x7,x9 // ap[0]*bp[0] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + mul x10,x8,x9 // ap[1]*bp[0] + umulh x11,x8,x9 + + mul x15,x6,x4 // "tp[0]"*n0 + mov sp,x22 // alloca + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 // discarded + // (*) As for removal of first multiplication and addition + // instructions. The outcome of first addition is + // guaranteed to be zero, which leaves two computationally + // significant outcomes: it either carries or not. Then + // question is when does it carry? Is there alternative + // way to deduce it? If you follow operations, you can + // observe that condition for carry is quite simple: + // x6 being non-zero. So that carry can be calculated + // by adding -1 to x6. That's what next instruction does. + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + adc x13,x13,xzr + cbz x21,.L1st_skip + +.L1st: + ldr x8,[x1],#8 + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + ldr x14,[x3],#8 + adds x12,x16,x13 + mul x10,x8,x9 // ap[j]*bp[0] + adc x13,x17,xzr + umulh x11,x8,x9 + + adds x12,x12,x6 + mul x16,x14,x15 // np[j]*m1 + adc x13,x13,xzr + umulh x17,x14,x15 + str x12,[x22],#8 // tp[j-1] + cbnz x21,.L1st + +.L1st_skip: + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adc x13,x17,xzr + + adds x12,x12,x6 + sub x20,x5,#8 // i=num-1 + adcs x13,x13,x7 + + adc x19,xzr,xzr // upmost overflow bit + stp x12,x13,[x22] + +.Louter: + ldr x9,[x2],#8 // bp[i] + ldp x7,x8,[x1],#16 + ldr x23,[sp] // tp[0] + add x22,sp,#8 + + mul x6,x7,x9 // ap[0]*bp[i] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + ldp x13,x14,[x3],#16 + mul x10,x8,x9 // ap[1]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x15,x6,x4 + sub x20,x20,#8 // i-- + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + cbz x21,.Linner_skip + +.Linner: + ldr x8,[x1],#8 + adc x13,x13,xzr + ldr x23,[x22],#8 // tp[j] + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + adds x12,x16,x13 + ldr x14,[x3],#8 + adc x13,x17,xzr + + mul x10,x8,x9 // ap[j]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x16,x14,x15 // np[j]*m1 + adds x12,x12,x6 + umulh x17,x14,x15 + str x12,[x22,#-16] // tp[j-1] + cbnz x21,.Linner + +.Linner_skip: + ldr x23,[x22],#8 // tp[j] + adc x13,x13,xzr + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adcs x13,x17,x19 + adc x19,xzr,xzr + + adds x6,x6,x23 + adc x7,x7,xzr + + adds x12,x12,x6 + adcs x13,x13,x7 + adc x19,x19,xzr // upmost overflow bit + stp x12,x13,[x22,#-16] + + cbnz x20,.Louter + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x14,[x3],#8 // np[0] + subs x21,x5,#8 // j=num-1 and clear borrow + mov x1,x0 +.Lsub: + sbcs x8,x23,x14 // tp[j]-np[j] + ldr x23,[x22],#8 + sub x21,x21,#8 // j-- + ldr x14,[x3],#8 + str x8,[x1],#8 // rp[j]=tp[j]-np[j] + cbnz x21,.Lsub + + sbcs x8,x23,x14 + sbcs x19,x19,xzr // did it borrow? + str x8,[x1],#8 // rp[num-1] + + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x8,[x0],#8 // rp[0] + sub x5,x5,#8 // num-- + nop +.Lcond_copy: + sub x5,x5,#8 // num-- + csel x14,x23,x8,lo // did it borrow? + ldr x23,[x22],#8 + ldr x8,[x0],#8 + str xzr,[x22,#-16] // wipe tp + str x14,[x0,#-16] + cbnz x5,.Lcond_copy + + csel x14,x23,x8,lo + str xzr,[x22,#-8] // wipe tp + str x14,[x0,#-8] + + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size bn_mul_mont,.-bn_mul_mont +.type __bn_sqr8x_mont,%function +.align 5 +__bn_sqr8x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to + // only from bn_mul_mont which has already signed the return address. + cmp x1,x2 + b.ne __bn_mul4x_mont +.Lsqr8x_mont: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x3,[sp,#96] // offload rp and np + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + ldp x12,x13,[x1,#8*6] + + sub x2,sp,x5,lsl#4 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + mov sp,x2 // alloca + sub x27,x5,#8*8 + b .Lsqr8x_zero_start + +.Lsqr8x_zero: + sub x27,x27,#8*8 + stp xzr,xzr,[x2,#8*0] + stp xzr,xzr,[x2,#8*2] + stp xzr,xzr,[x2,#8*4] + stp xzr,xzr,[x2,#8*6] +.Lsqr8x_zero_start: + stp xzr,xzr,[x2,#8*8] + stp xzr,xzr,[x2,#8*10] + stp xzr,xzr,[x2,#8*12] + stp xzr,xzr,[x2,#8*14] + add x2,x2,#8*16 + cbnz x27,.Lsqr8x_zero + + add x3,x1,x5 + add x1,x1,#8*8 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + mov x23,xzr + mov x24,xzr + mov x25,xzr + mov x26,xzr + mov x2,sp + str x4,[x29,#112] // offload n0 + + // Multiply everything but a[i]*a[i] +.align 4 +.Lsqr8x_outer_loop: + // a[1]a[0] (i) + // a[2]a[0] + // a[3]a[0] + // a[4]a[0] + // a[5]a[0] + // a[6]a[0] + // a[7]a[0] + // a[2]a[1] (ii) + // a[3]a[1] + // a[4]a[1] + // a[5]a[1] + // a[6]a[1] + // a[7]a[1] + // a[3]a[2] (iii) + // a[4]a[2] + // a[5]a[2] + // a[6]a[2] + // a[7]a[2] + // a[4]a[3] (iv) + // a[5]a[3] + // a[6]a[3] + // a[7]a[3] + // a[5]a[4] (v) + // a[6]a[4] + // a[7]a[4] + // a[6]a[5] (vi) + // a[7]a[5] + // a[7]a[6] (vii) + + mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) + mul x15,x8,x6 + mul x16,x9,x6 + mul x17,x10,x6 + adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) + mul x14,x11,x6 + adcs x21,x21,x15 + mul x15,x12,x6 + adcs x22,x22,x16 + mul x16,x13,x6 + adcs x23,x23,x17 + umulh x17,x7,x6 // hi(a[1..7]*a[0]) + adcs x24,x24,x14 + umulh x14,x8,x6 + adcs x25,x25,x15 + umulh x15,x9,x6 + adcs x26,x26,x16 + umulh x16,x10,x6 + stp x19,x20,[x2],#8*2 // t[0..1] + adc x19,xzr,xzr // t[8] + adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) + umulh x17,x11,x6 + adcs x22,x22,x14 + umulh x14,x12,x6 + adcs x23,x23,x15 + umulh x15,x13,x6 + adcs x24,x24,x16 + mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) + adcs x25,x25,x17 + mul x17,x9,x7 + adcs x26,x26,x14 + mul x14,x10,x7 + adc x19,x19,x15 + + mul x15,x11,x7 + adds x22,x22,x16 + mul x16,x12,x7 + adcs x23,x23,x17 + mul x17,x13,x7 + adcs x24,x24,x14 + umulh x14,x8,x7 // hi(a[2..7]*a[1]) + adcs x25,x25,x15 + umulh x15,x9,x7 + adcs x26,x26,x16 + umulh x16,x10,x7 + adcs x19,x19,x17 + umulh x17,x11,x7 + stp x21,x22,[x2],#8*2 // t[2..3] + adc x20,xzr,xzr // t[9] + adds x23,x23,x14 + umulh x14,x12,x7 + adcs x24,x24,x15 + umulh x15,x13,x7 + adcs x25,x25,x16 + mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) + adcs x26,x26,x17 + mul x17,x10,x8 + adcs x19,x19,x14 + mul x14,x11,x8 + adc x20,x20,x15 + + mul x15,x12,x8 + adds x24,x24,x16 + mul x16,x13,x8 + adcs x25,x25,x17 + umulh x17,x9,x8 // hi(a[3..7]*a[2]) + adcs x26,x26,x14 + umulh x14,x10,x8 + adcs x19,x19,x15 + umulh x15,x11,x8 + adcs x20,x20,x16 + umulh x16,x12,x8 + stp x23,x24,[x2],#8*2 // t[4..5] + adc x21,xzr,xzr // t[10] + adds x25,x25,x17 + umulh x17,x13,x8 + adcs x26,x26,x14 + mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) + adcs x19,x19,x15 + mul x15,x11,x9 + adcs x20,x20,x16 + mul x16,x12,x9 + adc x21,x21,x17 + + mul x17,x13,x9 + adds x26,x26,x14 + umulh x14,x10,x9 // hi(a[4..7]*a[3]) + adcs x19,x19,x15 + umulh x15,x11,x9 + adcs x20,x20,x16 + umulh x16,x12,x9 + adcs x21,x21,x17 + umulh x17,x13,x9 + stp x25,x26,[x2],#8*2 // t[6..7] + adc x22,xzr,xzr // t[11] + adds x19,x19,x14 + mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) + adcs x20,x20,x15 + mul x15,x12,x10 + adcs x21,x21,x16 + mul x16,x13,x10 + adc x22,x22,x17 + + umulh x17,x11,x10 // hi(a[5..7]*a[4]) + adds x20,x20,x14 + umulh x14,x12,x10 + adcs x21,x21,x15 + umulh x15,x13,x10 + adcs x22,x22,x16 + mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) + adc x23,xzr,xzr // t[12] + adds x21,x21,x17 + mul x17,x13,x11 + adcs x22,x22,x14 + umulh x14,x12,x11 // hi(a[6..7]*a[5]) + adc x23,x23,x15 + + umulh x15,x13,x11 + adds x22,x22,x16 + mul x16,x13,x12 // lo(a[7]*a[6]) (vii) + adcs x23,x23,x17 + umulh x17,x13,x12 // hi(a[7]*a[6]) + adc x24,xzr,xzr // t[13] + adds x23,x23,x14 + sub x27,x3,x1 // done yet? + adc x24,x24,x15 + + adds x24,x24,x16 + sub x14,x3,x5 // rewinded ap + adc x25,xzr,xzr // t[14] + add x25,x25,x17 + + cbz x27,.Lsqr8x_outer_break + + mov x4,x6 + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x0,x1 + adcs x26,xzr,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved below + mov x27,#-8*8 + + // a[8]a[0] + // a[9]a[0] + // a[a]a[0] + // a[b]a[0] + // a[c]a[0] + // a[d]a[0] + // a[e]a[0] + // a[f]a[0] + // a[8]a[1] + // a[f]a[1]........................ + // a[8]a[2] + // a[f]a[2]........................ + // a[8]a[3] + // a[f]a[3]........................ + // a[8]a[4] + // a[f]a[4]........................ + // a[8]a[5] + // a[f]a[5]........................ + // a[8]a[6] + // a[f]a[6]........................ + // a[8]a[7] + // a[f]a[7]........................ +.Lsqr8x_mul: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,.Lsqr8x_mul + // note that carry flag is guaranteed + // to be zero at this point + cmp x1,x3 // done yet? + b.eq .Lsqr8x_break + + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + ldr x4,[x0,#-8*8] + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b .Lsqr8x_mul + +.align 4 +.Lsqr8x_break: + ldp x6,x7,[x0,#8*0] + add x1,x0,#8*8 + ldp x8,x9,[x0,#8*2] + sub x14,x3,x1 // is it last iteration? + ldp x10,x11,[x0,#8*4] + sub x15,x2,x14 + ldp x12,x13,[x0,#8*6] + cbz x14,.Lsqr8x_outer_loop + + stp x19,x20,[x2,#8*0] + ldp x19,x20,[x15,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x15,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x15,#8*4] + stp x25,x26,[x2,#8*6] + mov x2,x15 + ldp x25,x26,[x15,#8*6] + b .Lsqr8x_outer_loop + +.align 4 +.Lsqr8x_outer_break: + // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] + ldp x15,x16,[sp,#8*1] + ldp x11,x13,[x14,#8*2] + add x1,x14,#8*4 + ldp x17,x14,[sp,#8*3] + + stp x19,x20,[x2,#8*0] + mul x19,x7,x7 + stp x21,x22,[x2,#8*2] + umulh x7,x7,x7 + stp x23,x24,[x2,#8*4] + mul x8,x9,x9 + stp x25,x26,[x2,#8*6] + mov x2,sp + umulh x9,x9,x9 + adds x20,x7,x15,lsl#1 + extr x15,x16,x15,#63 + sub x27,x5,#8*4 + +.Lsqr4x_shift_n_add: + adcs x21,x8,x15 + extr x16,x17,x16,#63 + sub x27,x27,#8*4 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + ldp x7,x9,[x1],#8*2 + umulh x11,x11,x11 + mul x12,x13,x13 + umulh x13,x13,x13 + extr x17,x14,x17,#63 + stp x19,x20,[x2,#8*0] + adcs x23,x10,x17 + extr x14,x15,x14,#63 + stp x21,x22,[x2,#8*2] + adcs x24,x11,x14 + ldp x17,x14,[x2,#8*7] + extr x15,x16,x15,#63 + adcs x25,x12,x15 + extr x16,x17,x16,#63 + adcs x26,x13,x16 + ldp x15,x16,[x2,#8*9] + mul x6,x7,x7 + ldp x11,x13,[x1],#8*2 + umulh x7,x7,x7 + mul x8,x9,x9 + umulh x9,x9,x9 + stp x23,x24,[x2,#8*4] + extr x17,x14,x17,#63 + stp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + adcs x19,x6,x17 + extr x14,x15,x14,#63 + adcs x20,x7,x14 + ldp x17,x14,[x2,#8*3] + extr x15,x16,x15,#63 + cbnz x27,.Lsqr4x_shift_n_add + ldp x1,x4,[x29,#104] // pull np and n0 + + adcs x21,x8,x15 + extr x16,x17,x16,#63 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + umulh x11,x11,x11 + stp x19,x20,[x2,#8*0] + mul x12,x13,x13 + umulh x13,x13,x13 + stp x21,x22,[x2,#8*2] + extr x17,x14,x17,#63 + adcs x23,x10,x17 + extr x14,x15,x14,#63 + ldp x19,x20,[sp,#8*0] + adcs x24,x11,x14 + extr x15,x16,x15,#63 + ldp x6,x7,[x1,#8*0] + adcs x25,x12,x15 + extr x16,xzr,x16,#63 + ldp x8,x9,[x1,#8*2] + adc x26,x13,x16 + ldp x10,x11,[x1,#8*4] + + // Reduce by 512 bits per iteration + mul x28,x4,x19 // t[0]*n0 + ldp x12,x13,[x1,#8*6] + add x3,x1,x5 + ldp x21,x22,[sp,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[sp,#8*4] + stp x25,x26,[x2,#8*6] + ldp x25,x26,[sp,#8*6] + add x1,x1,#8*8 + mov x30,xzr // initial top-most carry + mov x2,sp + mov x27,#8 + +.Lsqr8x_reduction: + // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) + mul x15,x7,x28 + sub x27,x27,#1 + mul x16,x8,x28 + str x28,[x2],#8 // put aside t[0]*n0 for tail processing + mul x17,x9,x28 + // (*) adds xzr,x19,x14 + subs xzr,x19,#1 // (*) + mul x14,x10,x28 + adcs x19,x20,x15 + mul x15,x11,x28 + adcs x20,x21,x16 + mul x16,x12,x28 + adcs x21,x22,x17 + mul x17,x13,x28 + adcs x22,x23,x14 + umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) + adcs x23,x24,x15 + umulh x15,x7,x28 + adcs x24,x25,x16 + umulh x16,x8,x28 + adcs x25,x26,x17 + umulh x17,x9,x28 + adc x26,xzr,xzr + adds x19,x19,x14 + umulh x14,x10,x28 + adcs x20,x20,x15 + umulh x15,x11,x28 + adcs x21,x21,x16 + umulh x16,x12,x28 + adcs x22,x22,x17 + umulh x17,x13,x28 + mul x28,x4,x19 // next t[0]*n0 + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adc x26,x26,x17 + cbnz x27,.Lsqr8x_reduction + + ldp x14,x15,[x2,#8*0] + ldp x16,x17,[x2,#8*2] + mov x0,x2 + sub x27,x3,x1 // done yet? + adds x19,x19,x14 + adcs x20,x20,x15 + ldp x14,x15,[x2,#8*4] + adcs x21,x21,x16 + adcs x22,x22,x17 + ldp x16,x17,[x2,#8*6] + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adcs x26,x26,x17 + //adc x28,xzr,xzr // moved below + cbz x27,.Lsqr8x8_post_condition + + ldr x4,[x2,#-8*8] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + mov x27,#-8*8 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + +.Lsqr8x_tail: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,.Lsqr8x_tail + // note that carry flag is guaranteed + // to be zero at this point + ldp x6,x7,[x2,#8*0] + sub x27,x3,x1 // done yet? + sub x16,x3,x5 // rewinded np + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + cbz x27,.Lsqr8x_tail_break + + ldr x4,[x0,#-8*8] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b .Lsqr8x_tail + +.align 4 +.Lsqr8x_tail_break: + ldr x4,[x29,#112] // pull n0 + add x27,x2,#8*8 // end of current t[num] window + + subs xzr,x30,#1 // "move" top-most carry to carry bit + adcs x14,x19,x6 + adcs x15,x20,x7 + ldp x19,x20,[x0,#8*0] + adcs x21,x21,x8 + ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] + adcs x22,x22,x9 + ldp x8,x9,[x16,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x16,#8*4] + adcs x25,x25,x12 + adcs x26,x26,x13 + ldp x12,x13,[x16,#8*6] + add x1,x16,#8*8 + adc x30,xzr,xzr // top-most carry + mul x28,x4,x19 + stp x14,x15,[x2,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x0,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x0,#8*4] + cmp x27,x29 // did we hit the bottom? + stp x25,x26,[x2,#8*6] + mov x2,x0 // slide the window + ldp x25,x26,[x0,#8*6] + mov x27,#8 + b.ne .Lsqr8x_reduction + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x0,[x29,#96] // pull rp + add x2,x2,#8*8 + subs x14,x19,x6 + sbcs x15,x20,x7 + sub x27,x5,#8*8 + mov x3,x0 // x0 copy + +.Lsqr8x_sub: + sbcs x16,x21,x8 + ldp x6,x7,[x1,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x1,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x10,x11,[x1,#8*4] + sbcs x17,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + ldp x19,x20,[x2,#8*0] + sub x27,x27,#8*8 + ldp x21,x22,[x2,#8*2] + ldp x23,x24,[x2,#8*4] + ldp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + stp x14,x15,[x0,#8*4] + sbcs x14,x19,x6 + stp x16,x17,[x0,#8*6] + add x0,x0,#8*8 + sbcs x15,x20,x7 + cbnz x27,.Lsqr8x_sub + + sbcs x16,x21,x8 + mov x2,sp + add x1,sp,x5 + ldp x6,x7,[x3,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x3,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x19,x20,[x1,#8*0] + sbcs x17,x26,x13 + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + stp x14,x15,[x0,#8*4] + stp x16,x17,[x0,#8*6] + + sub x27,x5,#8*4 +.Lsqr4x_cond_copy: + sub x27,x27,#8*4 + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + ldp x6,x7,[x3,#8*4] + ldp x19,x20,[x1,#8*4] + csel x16,x21,x8,lo + stp xzr,xzr,[x2,#8*2] + add x2,x2,#8*4 + csel x17,x22,x9,lo + ldp x8,x9,[x3,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + stp xzr,xzr,[x1,#8*0] + stp xzr,xzr,[x1,#8*2] + cbnz x27,.Lsqr4x_cond_copy + + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + stp xzr,xzr,[x2,#8*2] + csel x16,x21,x8,lo + csel x17,x22,x9,lo + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + + b .Lsqr8x_done + +.align 4 +.Lsqr8x8_post_condition: + adc x28,xzr,xzr + ldr x30,[x29,#8] // pull return address + // x19-7,x28 hold result, x6-7 hold modulus + subs x6,x19,x6 + ldr x1,[x29,#96] // pull rp + sbcs x7,x20,x7 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x8 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x9 + stp xzr,xzr,[sp,#8*4] + sbcs x10,x23,x10 + stp xzr,xzr,[sp,#8*6] + sbcs x11,x24,x11 + stp xzr,xzr,[sp,#8*8] + sbcs x12,x25,x12 + stp xzr,xzr,[sp,#8*10] + sbcs x13,x26,x13 + stp xzr,xzr,[sp,#8*12] + sbcs x28,x28,xzr // did it borrow? + stp xzr,xzr,[sp,#8*14] + + // x6-7 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + csel x10,x23,x10,lo + csel x11,x24,x11,lo + stp x8,x9,[x1,#8*2] + csel x12,x25,x12,lo + csel x13,x26,x13,lo + stp x10,x11,[x1,#8*4] + stp x12,x13,[x1,#8*6] + +.Lsqr8x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is popped earlier + AARCH64_VALIDATE_LINK_REGISTER + ret +.size __bn_sqr8x_mont,.-__bn_sqr8x_mont +.type __bn_mul4x_mont,%function +.align 5 +__bn_mul4x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to + // only from bn_mul_mont or __bn_mul8x_mont which have already signed the + // return address. + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + sub x26,sp,x5,lsl#3 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + sub sp,x26,#8*4 // alloca + + add x10,x2,x5 + add x27,x1,x5 + stp x0,x10,[x29,#96] // offload rp and &b[num] + + ldr x24,[x2,#8*0] // b[0] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + ldp x14,x15,[x3,#8*0] // n[0..3] + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + mov x28,#0 + mov x26,sp + +.Loop_mul4x_1st_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[0]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[0]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + sub x10,x27,x1 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_1st_reduction + + cbz x10,.Lmul4x4_post_condition + + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldr x25,[sp] // a[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +.Loop_mul4x_1st_tail: + mul x10,x6,x24 // lo(a[4..7]*b[i]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[i]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + adcs x23,x23,x0 + umulh x13,x17,x25 + adc x0,xzr,xzr + ldr x25,[sp,x28] // next t[0]*n0 + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_1st_tail + + sub x11,x27,x5 // rewinded x1 + cbz x10,.Lmul4x_proceed + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b .Loop_mul4x_1st_tail + +.align 5 +.Lmul4x_proceed: + ldr x24,[x2,#8*4]! // *++b + adc x30,x0,xzr + ldp x6,x7,[x11,#8*0] // a[0..3] + sub x3,x3,x5 // rewind np + ldp x8,x9,[x11,#8*2] + add x1,x11,#8*4 + + stp x19,x20,[x26,#8*0] // result!!! + ldp x19,x20,[sp,#8*4] // t[0..3] + stp x21,x22,[x26,#8*2] // result!!! + ldp x21,x22,[sp,#8*6] + + ldp x14,x15,[x3,#8*0] // n[0..3] + mov x26,sp + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + +.align 4 +.Loop_mul4x_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[4]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + // (*) mul x10,x14,x25 + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_reduction + + adc x0,x0,xzr + ldp x10,x11,[x26,#8*4] // t[4..7] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + + ldr x25,[sp] // t[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +.align 4 +.Loop_mul4x_tail: + mul x10,x6,x24 // lo(a[4..7]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[4]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + umulh x13,x17,x25 + adcs x23,x23,x0 + ldr x25,[sp,x28] // next a[0]*n0 + adc x0,xzr,xzr + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_tail + + sub x11,x3,x5 // rewinded np? + adc x0,x0,xzr + cbz x10,.Loop_mul4x_break + + ldp x10,x11,[x26,#8*4] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b .Loop_mul4x_tail + +.align 4 +.Loop_mul4x_break: + ldp x12,x13,[x29,#96] // pull rp and &b[num] + adds x19,x19,x30 + add x2,x2,#8*4 // bp++ + adcs x20,x20,xzr + sub x1,x1,x5 // rewind ap + adcs x21,x21,xzr + stp x19,x20,[x26,#8*0] // result!!! + adcs x22,x22,xzr + ldp x19,x20,[sp,#8*4] // t[0..3] + adc x30,x0,xzr + stp x21,x22,[x26,#8*2] // result!!! + cmp x2,x13 // done yet? + ldp x21,x22,[sp,#8*6] + ldp x14,x15,[x11,#8*0] // n[0..3] + ldp x16,x17,[x11,#8*2] + add x3,x11,#8*4 + b.eq .Lmul4x_post + + ldr x24,[x2] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + adds x1,x1,#8*4 // clear carry bit + mov x0,xzr + mov x26,sp + b .Loop_mul4x_reduction + +.align 4 +.Lmul4x_post: + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + mov x0,x12 + mov x27,x12 // x0 copy + subs x10,x19,x14 + add x26,sp,#8*8 + sbcs x11,x20,x15 + sub x28,x5,#8*4 + +.Lmul4x_sub: + sbcs x12,x21,x16 + ldp x14,x15,[x3,#8*0] + sub x28,x28,#8*4 + ldp x19,x20,[x26,#8*0] + sbcs x13,x22,x17 + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + ldp x21,x22,[x26,#8*2] + add x26,x26,#8*4 + stp x10,x11,[x0,#8*0] + sbcs x10,x19,x14 + stp x12,x13,[x0,#8*2] + add x0,x0,#8*4 + sbcs x11,x20,x15 + cbnz x28,.Lmul4x_sub + + sbcs x12,x21,x16 + mov x26,sp + add x1,sp,#8*4 + ldp x6,x7,[x27,#8*0] + sbcs x13,x22,x17 + stp x10,x11,[x0,#8*0] + ldp x8,x9,[x27,#8*2] + stp x12,x13,[x0,#8*2] + ldp x19,x20,[x1,#8*0] + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + + sub x28,x5,#8*4 +.Lmul4x_cond_copy: + sub x28,x28,#8*4 + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + ldp x6,x7,[x27,#8*4] + ldp x19,x20,[x1,#8*4] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*2] + add x26,x26,#8*4 + csel x13,x22,x9,lo + ldp x8,x9,[x27,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + add x27,x27,#8*4 + cbnz x28,.Lmul4x_cond_copy + + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + stp xzr,xzr,[x26,#8*2] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*3] + csel x13,x22,x9,lo + stp xzr,xzr,[x26,#8*4] + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + + b .Lmul4x_done + +.align 4 +.Lmul4x4_post_condition: + adc x0,x0,xzr + ldr x1,[x29,#96] // pull rp + // x19-3,x0 hold result, x14-7 hold modulus + subs x6,x19,x14 + ldr x30,[x29,#8] // pull return address + sbcs x7,x20,x15 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x16 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x17 + stp xzr,xzr,[sp,#8*4] + sbcs xzr,x0,xzr // did it borrow? + stp xzr,xzr,[sp,#8*6] + + // x6-3 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + stp x8,x9,[x1,#8*2] + +.Lmul4x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is popped earlier + AARCH64_VALIDATE_LINK_REGISTER + ret +.size __bn_mul4x_mont,.-__bn_mul4x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 4 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/armv8-mont-win.S b/yass/third_party/boringssl/src/gen/bcm/armv8-mont-win.S new file mode 100644 index 0000000000..dcce02c95d --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/armv8-mont-win.S @@ -0,0 +1,1431 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.text + +.globl bn_mul_mont + +.def bn_mul_mont + .type 32 +.endef +.align 5 +bn_mul_mont: + AARCH64_SIGN_LINK_REGISTER + tst x5,#7 + b.eq __bn_sqr8x_mont + tst x5,#3 + b.eq __bn_mul4x_mont +Lmul_mont: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldr x9,[x2],#8 // bp[0] + sub x22,sp,x5,lsl#3 + ldp x7,x8,[x1],#16 // ap[0..1] + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + and x22,x22,#-16 // ABI says so + ldp x13,x14,[x3],#16 // np[0..1] + + mul x6,x7,x9 // ap[0]*bp[0] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + mul x10,x8,x9 // ap[1]*bp[0] + umulh x11,x8,x9 + + mul x15,x6,x4 // "tp[0]"*n0 + mov sp,x22 // alloca + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 // discarded + // (*) As for removal of first multiplication and addition + // instructions. The outcome of first addition is + // guaranteed to be zero, which leaves two computationally + // significant outcomes: it either carries or not. Then + // question is when does it carry? Is there alternative + // way to deduce it? If you follow operations, you can + // observe that condition for carry is quite simple: + // x6 being non-zero. So that carry can be calculated + // by adding -1 to x6. That's what next instruction does. + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + adc x13,x13,xzr + cbz x21,L1st_skip + +L1st: + ldr x8,[x1],#8 + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + ldr x14,[x3],#8 + adds x12,x16,x13 + mul x10,x8,x9 // ap[j]*bp[0] + adc x13,x17,xzr + umulh x11,x8,x9 + + adds x12,x12,x6 + mul x16,x14,x15 // np[j]*m1 + adc x13,x13,xzr + umulh x17,x14,x15 + str x12,[x22],#8 // tp[j-1] + cbnz x21,L1st + +L1st_skip: + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adc x13,x17,xzr + + adds x12,x12,x6 + sub x20,x5,#8 // i=num-1 + adcs x13,x13,x7 + + adc x19,xzr,xzr // upmost overflow bit + stp x12,x13,[x22] + +Louter: + ldr x9,[x2],#8 // bp[i] + ldp x7,x8,[x1],#16 + ldr x23,[sp] // tp[0] + add x22,sp,#8 + + mul x6,x7,x9 // ap[0]*bp[i] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + ldp x13,x14,[x3],#16 + mul x10,x8,x9 // ap[1]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x15,x6,x4 + sub x20,x20,#8 // i-- + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + cbz x21,Linner_skip + +Linner: + ldr x8,[x1],#8 + adc x13,x13,xzr + ldr x23,[x22],#8 // tp[j] + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + adds x12,x16,x13 + ldr x14,[x3],#8 + adc x13,x17,xzr + + mul x10,x8,x9 // ap[j]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x16,x14,x15 // np[j]*m1 + adds x12,x12,x6 + umulh x17,x14,x15 + str x12,[x22,#-16] // tp[j-1] + cbnz x21,Linner + +Linner_skip: + ldr x23,[x22],#8 // tp[j] + adc x13,x13,xzr + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adcs x13,x17,x19 + adc x19,xzr,xzr + + adds x6,x6,x23 + adc x7,x7,xzr + + adds x12,x12,x6 + adcs x13,x13,x7 + adc x19,x19,xzr // upmost overflow bit + stp x12,x13,[x22,#-16] + + cbnz x20,Louter + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x14,[x3],#8 // np[0] + subs x21,x5,#8 // j=num-1 and clear borrow + mov x1,x0 +Lsub: + sbcs x8,x23,x14 // tp[j]-np[j] + ldr x23,[x22],#8 + sub x21,x21,#8 // j-- + ldr x14,[x3],#8 + str x8,[x1],#8 // rp[j]=tp[j]-np[j] + cbnz x21,Lsub + + sbcs x8,x23,x14 + sbcs x19,x19,xzr // did it borrow? + str x8,[x1],#8 // rp[num-1] + + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x8,[x0],#8 // rp[0] + sub x5,x5,#8 // num-- + nop +Lcond_copy: + sub x5,x5,#8 // num-- + csel x14,x23,x8,lo // did it borrow? + ldr x23,[x22],#8 + ldr x8,[x0],#8 + str xzr,[x22,#-16] // wipe tp + str x14,[x0,#-16] + cbnz x5,Lcond_copy + + csel x14,x23,x8,lo + str xzr,[x22,#-8] // wipe tp + str x14,[x0,#-8] + + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.def __bn_sqr8x_mont + .type 32 +.endef +.align 5 +__bn_sqr8x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to + // only from bn_mul_mont which has already signed the return address. + cmp x1,x2 + b.ne __bn_mul4x_mont +Lsqr8x_mont: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x3,[sp,#96] // offload rp and np + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + ldp x12,x13,[x1,#8*6] + + sub x2,sp,x5,lsl#4 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + mov sp,x2 // alloca + sub x27,x5,#8*8 + b Lsqr8x_zero_start + +Lsqr8x_zero: + sub x27,x27,#8*8 + stp xzr,xzr,[x2,#8*0] + stp xzr,xzr,[x2,#8*2] + stp xzr,xzr,[x2,#8*4] + stp xzr,xzr,[x2,#8*6] +Lsqr8x_zero_start: + stp xzr,xzr,[x2,#8*8] + stp xzr,xzr,[x2,#8*10] + stp xzr,xzr,[x2,#8*12] + stp xzr,xzr,[x2,#8*14] + add x2,x2,#8*16 + cbnz x27,Lsqr8x_zero + + add x3,x1,x5 + add x1,x1,#8*8 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + mov x23,xzr + mov x24,xzr + mov x25,xzr + mov x26,xzr + mov x2,sp + str x4,[x29,#112] // offload n0 + + // Multiply everything but a[i]*a[i] +.align 4 +Lsqr8x_outer_loop: + // a[1]a[0] (i) + // a[2]a[0] + // a[3]a[0] + // a[4]a[0] + // a[5]a[0] + // a[6]a[0] + // a[7]a[0] + // a[2]a[1] (ii) + // a[3]a[1] + // a[4]a[1] + // a[5]a[1] + // a[6]a[1] + // a[7]a[1] + // a[3]a[2] (iii) + // a[4]a[2] + // a[5]a[2] + // a[6]a[2] + // a[7]a[2] + // a[4]a[3] (iv) + // a[5]a[3] + // a[6]a[3] + // a[7]a[3] + // a[5]a[4] (v) + // a[6]a[4] + // a[7]a[4] + // a[6]a[5] (vi) + // a[7]a[5] + // a[7]a[6] (vii) + + mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) + mul x15,x8,x6 + mul x16,x9,x6 + mul x17,x10,x6 + adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) + mul x14,x11,x6 + adcs x21,x21,x15 + mul x15,x12,x6 + adcs x22,x22,x16 + mul x16,x13,x6 + adcs x23,x23,x17 + umulh x17,x7,x6 // hi(a[1..7]*a[0]) + adcs x24,x24,x14 + umulh x14,x8,x6 + adcs x25,x25,x15 + umulh x15,x9,x6 + adcs x26,x26,x16 + umulh x16,x10,x6 + stp x19,x20,[x2],#8*2 // t[0..1] + adc x19,xzr,xzr // t[8] + adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) + umulh x17,x11,x6 + adcs x22,x22,x14 + umulh x14,x12,x6 + adcs x23,x23,x15 + umulh x15,x13,x6 + adcs x24,x24,x16 + mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) + adcs x25,x25,x17 + mul x17,x9,x7 + adcs x26,x26,x14 + mul x14,x10,x7 + adc x19,x19,x15 + + mul x15,x11,x7 + adds x22,x22,x16 + mul x16,x12,x7 + adcs x23,x23,x17 + mul x17,x13,x7 + adcs x24,x24,x14 + umulh x14,x8,x7 // hi(a[2..7]*a[1]) + adcs x25,x25,x15 + umulh x15,x9,x7 + adcs x26,x26,x16 + umulh x16,x10,x7 + adcs x19,x19,x17 + umulh x17,x11,x7 + stp x21,x22,[x2],#8*2 // t[2..3] + adc x20,xzr,xzr // t[9] + adds x23,x23,x14 + umulh x14,x12,x7 + adcs x24,x24,x15 + umulh x15,x13,x7 + adcs x25,x25,x16 + mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) + adcs x26,x26,x17 + mul x17,x10,x8 + adcs x19,x19,x14 + mul x14,x11,x8 + adc x20,x20,x15 + + mul x15,x12,x8 + adds x24,x24,x16 + mul x16,x13,x8 + adcs x25,x25,x17 + umulh x17,x9,x8 // hi(a[3..7]*a[2]) + adcs x26,x26,x14 + umulh x14,x10,x8 + adcs x19,x19,x15 + umulh x15,x11,x8 + adcs x20,x20,x16 + umulh x16,x12,x8 + stp x23,x24,[x2],#8*2 // t[4..5] + adc x21,xzr,xzr // t[10] + adds x25,x25,x17 + umulh x17,x13,x8 + adcs x26,x26,x14 + mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) + adcs x19,x19,x15 + mul x15,x11,x9 + adcs x20,x20,x16 + mul x16,x12,x9 + adc x21,x21,x17 + + mul x17,x13,x9 + adds x26,x26,x14 + umulh x14,x10,x9 // hi(a[4..7]*a[3]) + adcs x19,x19,x15 + umulh x15,x11,x9 + adcs x20,x20,x16 + umulh x16,x12,x9 + adcs x21,x21,x17 + umulh x17,x13,x9 + stp x25,x26,[x2],#8*2 // t[6..7] + adc x22,xzr,xzr // t[11] + adds x19,x19,x14 + mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) + adcs x20,x20,x15 + mul x15,x12,x10 + adcs x21,x21,x16 + mul x16,x13,x10 + adc x22,x22,x17 + + umulh x17,x11,x10 // hi(a[5..7]*a[4]) + adds x20,x20,x14 + umulh x14,x12,x10 + adcs x21,x21,x15 + umulh x15,x13,x10 + adcs x22,x22,x16 + mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) + adc x23,xzr,xzr // t[12] + adds x21,x21,x17 + mul x17,x13,x11 + adcs x22,x22,x14 + umulh x14,x12,x11 // hi(a[6..7]*a[5]) + adc x23,x23,x15 + + umulh x15,x13,x11 + adds x22,x22,x16 + mul x16,x13,x12 // lo(a[7]*a[6]) (vii) + adcs x23,x23,x17 + umulh x17,x13,x12 // hi(a[7]*a[6]) + adc x24,xzr,xzr // t[13] + adds x23,x23,x14 + sub x27,x3,x1 // done yet? + adc x24,x24,x15 + + adds x24,x24,x16 + sub x14,x3,x5 // rewinded ap + adc x25,xzr,xzr // t[14] + add x25,x25,x17 + + cbz x27,Lsqr8x_outer_break + + mov x4,x6 + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x0,x1 + adcs x26,xzr,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved below + mov x27,#-8*8 + + // a[8]a[0] + // a[9]a[0] + // a[a]a[0] + // a[b]a[0] + // a[c]a[0] + // a[d]a[0] + // a[e]a[0] + // a[f]a[0] + // a[8]a[1] + // a[f]a[1]........................ + // a[8]a[2] + // a[f]a[2]........................ + // a[8]a[3] + // a[f]a[3]........................ + // a[8]a[4] + // a[f]a[4]........................ + // a[8]a[5] + // a[f]a[5]........................ + // a[8]a[6] + // a[f]a[6]........................ + // a[8]a[7] + // a[f]a[7]........................ +Lsqr8x_mul: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,Lsqr8x_mul + // note that carry flag is guaranteed + // to be zero at this point + cmp x1,x3 // done yet? + b.eq Lsqr8x_break + + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + ldr x4,[x0,#-8*8] + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b Lsqr8x_mul + +.align 4 +Lsqr8x_break: + ldp x6,x7,[x0,#8*0] + add x1,x0,#8*8 + ldp x8,x9,[x0,#8*2] + sub x14,x3,x1 // is it last iteration? + ldp x10,x11,[x0,#8*4] + sub x15,x2,x14 + ldp x12,x13,[x0,#8*6] + cbz x14,Lsqr8x_outer_loop + + stp x19,x20,[x2,#8*0] + ldp x19,x20,[x15,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x15,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x15,#8*4] + stp x25,x26,[x2,#8*6] + mov x2,x15 + ldp x25,x26,[x15,#8*6] + b Lsqr8x_outer_loop + +.align 4 +Lsqr8x_outer_break: + // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] + ldp x15,x16,[sp,#8*1] + ldp x11,x13,[x14,#8*2] + add x1,x14,#8*4 + ldp x17,x14,[sp,#8*3] + + stp x19,x20,[x2,#8*0] + mul x19,x7,x7 + stp x21,x22,[x2,#8*2] + umulh x7,x7,x7 + stp x23,x24,[x2,#8*4] + mul x8,x9,x9 + stp x25,x26,[x2,#8*6] + mov x2,sp + umulh x9,x9,x9 + adds x20,x7,x15,lsl#1 + extr x15,x16,x15,#63 + sub x27,x5,#8*4 + +Lsqr4x_shift_n_add: + adcs x21,x8,x15 + extr x16,x17,x16,#63 + sub x27,x27,#8*4 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + ldp x7,x9,[x1],#8*2 + umulh x11,x11,x11 + mul x12,x13,x13 + umulh x13,x13,x13 + extr x17,x14,x17,#63 + stp x19,x20,[x2,#8*0] + adcs x23,x10,x17 + extr x14,x15,x14,#63 + stp x21,x22,[x2,#8*2] + adcs x24,x11,x14 + ldp x17,x14,[x2,#8*7] + extr x15,x16,x15,#63 + adcs x25,x12,x15 + extr x16,x17,x16,#63 + adcs x26,x13,x16 + ldp x15,x16,[x2,#8*9] + mul x6,x7,x7 + ldp x11,x13,[x1],#8*2 + umulh x7,x7,x7 + mul x8,x9,x9 + umulh x9,x9,x9 + stp x23,x24,[x2,#8*4] + extr x17,x14,x17,#63 + stp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + adcs x19,x6,x17 + extr x14,x15,x14,#63 + adcs x20,x7,x14 + ldp x17,x14,[x2,#8*3] + extr x15,x16,x15,#63 + cbnz x27,Lsqr4x_shift_n_add + ldp x1,x4,[x29,#104] // pull np and n0 + + adcs x21,x8,x15 + extr x16,x17,x16,#63 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + umulh x11,x11,x11 + stp x19,x20,[x2,#8*0] + mul x12,x13,x13 + umulh x13,x13,x13 + stp x21,x22,[x2,#8*2] + extr x17,x14,x17,#63 + adcs x23,x10,x17 + extr x14,x15,x14,#63 + ldp x19,x20,[sp,#8*0] + adcs x24,x11,x14 + extr x15,x16,x15,#63 + ldp x6,x7,[x1,#8*0] + adcs x25,x12,x15 + extr x16,xzr,x16,#63 + ldp x8,x9,[x1,#8*2] + adc x26,x13,x16 + ldp x10,x11,[x1,#8*4] + + // Reduce by 512 bits per iteration + mul x28,x4,x19 // t[0]*n0 + ldp x12,x13,[x1,#8*6] + add x3,x1,x5 + ldp x21,x22,[sp,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[sp,#8*4] + stp x25,x26,[x2,#8*6] + ldp x25,x26,[sp,#8*6] + add x1,x1,#8*8 + mov x30,xzr // initial top-most carry + mov x2,sp + mov x27,#8 + +Lsqr8x_reduction: + // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) + mul x15,x7,x28 + sub x27,x27,#1 + mul x16,x8,x28 + str x28,[x2],#8 // put aside t[0]*n0 for tail processing + mul x17,x9,x28 + // (*) adds xzr,x19,x14 + subs xzr,x19,#1 // (*) + mul x14,x10,x28 + adcs x19,x20,x15 + mul x15,x11,x28 + adcs x20,x21,x16 + mul x16,x12,x28 + adcs x21,x22,x17 + mul x17,x13,x28 + adcs x22,x23,x14 + umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) + adcs x23,x24,x15 + umulh x15,x7,x28 + adcs x24,x25,x16 + umulh x16,x8,x28 + adcs x25,x26,x17 + umulh x17,x9,x28 + adc x26,xzr,xzr + adds x19,x19,x14 + umulh x14,x10,x28 + adcs x20,x20,x15 + umulh x15,x11,x28 + adcs x21,x21,x16 + umulh x16,x12,x28 + adcs x22,x22,x17 + umulh x17,x13,x28 + mul x28,x4,x19 // next t[0]*n0 + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adc x26,x26,x17 + cbnz x27,Lsqr8x_reduction + + ldp x14,x15,[x2,#8*0] + ldp x16,x17,[x2,#8*2] + mov x0,x2 + sub x27,x3,x1 // done yet? + adds x19,x19,x14 + adcs x20,x20,x15 + ldp x14,x15,[x2,#8*4] + adcs x21,x21,x16 + adcs x22,x22,x17 + ldp x16,x17,[x2,#8*6] + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adcs x26,x26,x17 + //adc x28,xzr,xzr // moved below + cbz x27,Lsqr8x8_post_condition + + ldr x4,[x2,#-8*8] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + mov x27,#-8*8 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + +Lsqr8x_tail: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,Lsqr8x_tail + // note that carry flag is guaranteed + // to be zero at this point + ldp x6,x7,[x2,#8*0] + sub x27,x3,x1 // done yet? + sub x16,x3,x5 // rewinded np + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + cbz x27,Lsqr8x_tail_break + + ldr x4,[x0,#-8*8] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b Lsqr8x_tail + +.align 4 +Lsqr8x_tail_break: + ldr x4,[x29,#112] // pull n0 + add x27,x2,#8*8 // end of current t[num] window + + subs xzr,x30,#1 // "move" top-most carry to carry bit + adcs x14,x19,x6 + adcs x15,x20,x7 + ldp x19,x20,[x0,#8*0] + adcs x21,x21,x8 + ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] + adcs x22,x22,x9 + ldp x8,x9,[x16,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x16,#8*4] + adcs x25,x25,x12 + adcs x26,x26,x13 + ldp x12,x13,[x16,#8*6] + add x1,x16,#8*8 + adc x30,xzr,xzr // top-most carry + mul x28,x4,x19 + stp x14,x15,[x2,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x0,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x0,#8*4] + cmp x27,x29 // did we hit the bottom? + stp x25,x26,[x2,#8*6] + mov x2,x0 // slide the window + ldp x25,x26,[x0,#8*6] + mov x27,#8 + b.ne Lsqr8x_reduction + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x0,[x29,#96] // pull rp + add x2,x2,#8*8 + subs x14,x19,x6 + sbcs x15,x20,x7 + sub x27,x5,#8*8 + mov x3,x0 // x0 copy + +Lsqr8x_sub: + sbcs x16,x21,x8 + ldp x6,x7,[x1,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x1,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x10,x11,[x1,#8*4] + sbcs x17,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + ldp x19,x20,[x2,#8*0] + sub x27,x27,#8*8 + ldp x21,x22,[x2,#8*2] + ldp x23,x24,[x2,#8*4] + ldp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + stp x14,x15,[x0,#8*4] + sbcs x14,x19,x6 + stp x16,x17,[x0,#8*6] + add x0,x0,#8*8 + sbcs x15,x20,x7 + cbnz x27,Lsqr8x_sub + + sbcs x16,x21,x8 + mov x2,sp + add x1,sp,x5 + ldp x6,x7,[x3,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x3,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x19,x20,[x1,#8*0] + sbcs x17,x26,x13 + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + stp x14,x15,[x0,#8*4] + stp x16,x17,[x0,#8*6] + + sub x27,x5,#8*4 +Lsqr4x_cond_copy: + sub x27,x27,#8*4 + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + ldp x6,x7,[x3,#8*4] + ldp x19,x20,[x1,#8*4] + csel x16,x21,x8,lo + stp xzr,xzr,[x2,#8*2] + add x2,x2,#8*4 + csel x17,x22,x9,lo + ldp x8,x9,[x3,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + stp xzr,xzr,[x1,#8*0] + stp xzr,xzr,[x1,#8*2] + cbnz x27,Lsqr4x_cond_copy + + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + stp xzr,xzr,[x2,#8*2] + csel x16,x21,x8,lo + csel x17,x22,x9,lo + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + + b Lsqr8x_done + +.align 4 +Lsqr8x8_post_condition: + adc x28,xzr,xzr + ldr x30,[x29,#8] // pull return address + // x19-7,x28 hold result, x6-7 hold modulus + subs x6,x19,x6 + ldr x1,[x29,#96] // pull rp + sbcs x7,x20,x7 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x8 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x9 + stp xzr,xzr,[sp,#8*4] + sbcs x10,x23,x10 + stp xzr,xzr,[sp,#8*6] + sbcs x11,x24,x11 + stp xzr,xzr,[sp,#8*8] + sbcs x12,x25,x12 + stp xzr,xzr,[sp,#8*10] + sbcs x13,x26,x13 + stp xzr,xzr,[sp,#8*12] + sbcs x28,x28,xzr // did it borrow? + stp xzr,xzr,[sp,#8*14] + + // x6-7 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + csel x10,x23,x10,lo + csel x11,x24,x11,lo + stp x8,x9,[x1,#8*2] + csel x12,x25,x12,lo + csel x13,x26,x13,lo + stp x10,x11,[x1,#8*4] + stp x12,x13,[x1,#8*6] + +Lsqr8x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is popped earlier + AARCH64_VALIDATE_LINK_REGISTER + ret + +.def __bn_mul4x_mont + .type 32 +.endef +.align 5 +__bn_mul4x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to + // only from bn_mul_mont or __bn_mul8x_mont which have already signed the + // return address. + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + sub x26,sp,x5,lsl#3 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + sub sp,x26,#8*4 // alloca + + add x10,x2,x5 + add x27,x1,x5 + stp x0,x10,[x29,#96] // offload rp and &b[num] + + ldr x24,[x2,#8*0] // b[0] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + ldp x14,x15,[x3,#8*0] // n[0..3] + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + mov x28,#0 + mov x26,sp + +Loop_mul4x_1st_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[0]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[0]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + sub x10,x27,x1 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_1st_reduction + + cbz x10,Lmul4x4_post_condition + + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldr x25,[sp] // a[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +Loop_mul4x_1st_tail: + mul x10,x6,x24 // lo(a[4..7]*b[i]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[i]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + adcs x23,x23,x0 + umulh x13,x17,x25 + adc x0,xzr,xzr + ldr x25,[sp,x28] // next t[0]*n0 + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_1st_tail + + sub x11,x27,x5 // rewinded x1 + cbz x10,Lmul4x_proceed + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b Loop_mul4x_1st_tail + +.align 5 +Lmul4x_proceed: + ldr x24,[x2,#8*4]! // *++b + adc x30,x0,xzr + ldp x6,x7,[x11,#8*0] // a[0..3] + sub x3,x3,x5 // rewind np + ldp x8,x9,[x11,#8*2] + add x1,x11,#8*4 + + stp x19,x20,[x26,#8*0] // result!!! + ldp x19,x20,[sp,#8*4] // t[0..3] + stp x21,x22,[x26,#8*2] // result!!! + ldp x21,x22,[sp,#8*6] + + ldp x14,x15,[x3,#8*0] // n[0..3] + mov x26,sp + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + +.align 4 +Loop_mul4x_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[4]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + // (*) mul x10,x14,x25 + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_reduction + + adc x0,x0,xzr + ldp x10,x11,[x26,#8*4] // t[4..7] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + + ldr x25,[sp] // t[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +.align 4 +Loop_mul4x_tail: + mul x10,x6,x24 // lo(a[4..7]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[4]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + umulh x13,x17,x25 + adcs x23,x23,x0 + ldr x25,[sp,x28] // next a[0]*n0 + adc x0,xzr,xzr + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,Loop_mul4x_tail + + sub x11,x3,x5 // rewinded np? + adc x0,x0,xzr + cbz x10,Loop_mul4x_break + + ldp x10,x11,[x26,#8*4] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b Loop_mul4x_tail + +.align 4 +Loop_mul4x_break: + ldp x12,x13,[x29,#96] // pull rp and &b[num] + adds x19,x19,x30 + add x2,x2,#8*4 // bp++ + adcs x20,x20,xzr + sub x1,x1,x5 // rewind ap + adcs x21,x21,xzr + stp x19,x20,[x26,#8*0] // result!!! + adcs x22,x22,xzr + ldp x19,x20,[sp,#8*4] // t[0..3] + adc x30,x0,xzr + stp x21,x22,[x26,#8*2] // result!!! + cmp x2,x13 // done yet? + ldp x21,x22,[sp,#8*6] + ldp x14,x15,[x11,#8*0] // n[0..3] + ldp x16,x17,[x11,#8*2] + add x3,x11,#8*4 + b.eq Lmul4x_post + + ldr x24,[x2] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + adds x1,x1,#8*4 // clear carry bit + mov x0,xzr + mov x26,sp + b Loop_mul4x_reduction + +.align 4 +Lmul4x_post: + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + mov x0,x12 + mov x27,x12 // x0 copy + subs x10,x19,x14 + add x26,sp,#8*8 + sbcs x11,x20,x15 + sub x28,x5,#8*4 + +Lmul4x_sub: + sbcs x12,x21,x16 + ldp x14,x15,[x3,#8*0] + sub x28,x28,#8*4 + ldp x19,x20,[x26,#8*0] + sbcs x13,x22,x17 + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + ldp x21,x22,[x26,#8*2] + add x26,x26,#8*4 + stp x10,x11,[x0,#8*0] + sbcs x10,x19,x14 + stp x12,x13,[x0,#8*2] + add x0,x0,#8*4 + sbcs x11,x20,x15 + cbnz x28,Lmul4x_sub + + sbcs x12,x21,x16 + mov x26,sp + add x1,sp,#8*4 + ldp x6,x7,[x27,#8*0] + sbcs x13,x22,x17 + stp x10,x11,[x0,#8*0] + ldp x8,x9,[x27,#8*2] + stp x12,x13,[x0,#8*2] + ldp x19,x20,[x1,#8*0] + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + + sub x28,x5,#8*4 +Lmul4x_cond_copy: + sub x28,x28,#8*4 + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + ldp x6,x7,[x27,#8*4] + ldp x19,x20,[x1,#8*4] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*2] + add x26,x26,#8*4 + csel x13,x22,x9,lo + ldp x8,x9,[x27,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + add x27,x27,#8*4 + cbnz x28,Lmul4x_cond_copy + + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + stp xzr,xzr,[x26,#8*2] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*3] + csel x13,x22,x9,lo + stp xzr,xzr,[x26,#8*4] + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + + b Lmul4x_done + +.align 4 +Lmul4x4_post_condition: + adc x0,x0,xzr + ldr x1,[x29,#96] // pull rp + // x19-3,x0 hold result, x14-7 hold modulus + subs x6,x19,x14 + ldr x30,[x29,#8] // pull return address + sbcs x7,x20,x15 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x16 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x17 + stp xzr,xzr,[sp,#8*4] + sbcs xzr,x0,xzr // did it borrow? + stp xzr,xzr,[sp,#8*6] + + // x6-3 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + stp x8,x9,[x1,#8*2] + +Lmul4x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is popped earlier + AARCH64_VALIDATE_LINK_REGISTER + ret + +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 4 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/bn-586-apple.S b/yass/third_party/boringssl/src/gen/bcm/bn-586-apple.S new file mode 100644 index 0000000000..93513d0bf3 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bn-586-apple.S @@ -0,0 +1,987 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _bn_mul_add_words +.private_extern _bn_mul_add_words +.align 4 +_bn_mul_add_words: +L_bn_mul_add_words_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp L002maw_sse2_entry +.align 4,0x90 +L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz L004maw_sse2_exit +L002maw_sse2_entry: + testl $4294967288,%ecx + jnz L003maw_sse2_unrolled +.align 2,0x90 +L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L005maw_sse2_loop +L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 4,0x90 +L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz L006maw_finish +.align 4,0x90 +L007maw_loop: + # Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz L007maw_loop +L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz L008maw_finish2 + jmp L009maw_end +L008maw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_mul_words +.private_extern _bn_mul_words +.align 4 +_bn_mul_words: +L_bn_mul_words_begin: + call L010PIC_me_up +L010PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 4,0x90 +L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 4,0x90 +L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz L013mw_finish +L014mw_loop: + # Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz L013mw_finish + jmp L014mw_loop +L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz L015mw_finish2 + jmp L016mw_end +L015mw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sqr_words +.private_extern _bn_sqr_words +.align 4 +_bn_sqr_words: +L_bn_sqr_words_begin: + call L017PIC_me_up +L017PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 4,0x90 +L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz L019sqr_sse2_loop + emms + ret +.align 4,0x90 +L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz L020sw_finish +L021sw_loop: + # Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + # Round 4 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + # Round 8 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + # Round 12 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + # Round 16 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + # Round 20 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + # Round 24 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + # Round 28 + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz L021sw_loop +L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz L022sw_end + # Tail Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz L022sw_end + # Tail Round 1 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz L022sw_end + # Tail Round 2 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz L022sw_end + # Tail Round 3 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz L022sw_end + # Tail Round 4 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz L022sw_end + # Tail Round 5 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz L022sw_end + # Tail Round 6 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_div_words +.private_extern _bn_div_words +.align 4 +_bn_div_words: +L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.globl _bn_add_words +.private_extern _bn_add_words +.align 4 +_bn_add_words: +L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L023aw_finish +L024aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L024aw_loop +L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L025aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L025aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L025aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L025aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L025aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L025aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L025aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_words +.private_extern _bn_sub_words +.align 4 +_bn_sub_words: +L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L026aw_finish +L027aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L027aw_loop +L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L028aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L028aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L028aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L028aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L028aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L028aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L028aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/bn-586-linux.S b/yass/third_party/boringssl/src/gen/bcm/bn-586-linux.S new file mode 100644 index 0000000000..311f22c04a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bn-586-linux.S @@ -0,0 +1,995 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl bn_mul_add_words +.hidden bn_mul_add_words +.type bn_mul_add_words,@function +.align 16 +bn_mul_add_words: +.L_bn_mul_add_words_begin: + call .L000PIC_me_up +.L000PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp .L002maw_sse2_entry +.align 16 +.L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz .L004maw_sse2_exit +.L002maw_sse2_entry: + testl $4294967288,%ecx + jnz .L003maw_sse2_unrolled +.align 4 +.L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L005maw_sse2_loop +.L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 16 +.L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz .L006maw_finish +.align 16 +.L007maw_loop: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz .L007maw_loop +.L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz .L008maw_finish2 + jmp .L009maw_end +.L008maw_finish2: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_add_words,.-.L_bn_mul_add_words_begin +.globl bn_mul_words +.hidden bn_mul_words +.type bn_mul_words,@function +.align 16 +bn_mul_words: +.L_bn_mul_words_begin: + call .L010PIC_me_up +.L010PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 16 +.L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 16 +.L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz .L013mw_finish +.L014mw_loop: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz .L013mw_finish + jmp .L014mw_loop +.L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz .L015mw_finish2 + jmp .L016mw_end +.L015mw_finish2: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_words,.-.L_bn_mul_words_begin +.globl bn_sqr_words +.hidden bn_sqr_words +.type bn_sqr_words,@function +.align 16 +bn_sqr_words: +.L_bn_sqr_words_begin: + call .L017PIC_me_up +.L017PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 16 +.L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz .L019sqr_sse2_loop + emms + ret +.align 16 +.L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz .L020sw_finish +.L021sw_loop: + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz .L021sw_loop +.L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz .L022sw_end + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz .L022sw_end + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz .L022sw_end + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz .L022sw_end + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz .L022sw_end + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz .L022sw_end + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz .L022sw_end + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +.L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sqr_words,.-.L_bn_sqr_words_begin +.globl bn_div_words +.hidden bn_div_words +.type bn_div_words,@function +.align 16 +bn_div_words: +.L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.size bn_div_words,.-.L_bn_div_words_begin +.globl bn_add_words +.hidden bn_add_words +.type bn_add_words,@function +.align 16 +bn_add_words: +.L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L023aw_finish +.L024aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L024aw_loop +.L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L025aw_end + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L025aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L025aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L025aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L025aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L025aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L025aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_add_words,.-.L_bn_add_words_begin +.globl bn_sub_words +.hidden bn_sub_words +.type bn_sub_words,@function +.align 16 +bn_sub_words: +.L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L026aw_finish +.L027aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L027aw_loop +.L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L028aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L028aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L028aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L028aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L028aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L028aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_words,.-.L_bn_sub_words_begin +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/bn-586-win.asm b/yass/third_party/boringssl/src/gen/bcm/bn-586-win.asm new file mode 100644 index 0000000000..f7ddfa8413 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bn-586-win.asm @@ -0,0 +1,982 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _bn_mul_add_words +align 16 +_bn_mul_add_words: +L$_bn_mul_add_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$000maw_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] + movd mm0,DWORD [16+esp] + pxor mm1,mm1 + jmp NEAR L$001maw_sse2_entry +align 16 +L$002maw_sse2_unrolled: + movd mm3,DWORD [eax] + paddq mm1,mm3 + movd mm2,DWORD [edx] + pmuludq mm2,mm0 + movd mm4,DWORD [4+edx] + pmuludq mm4,mm0 + movd mm6,DWORD [8+edx] + pmuludq mm6,mm0 + movd mm7,DWORD [12+edx] + pmuludq mm7,mm0 + paddq mm1,mm2 + movd mm3,DWORD [4+eax] + paddq mm3,mm4 + movd mm5,DWORD [8+eax] + paddq mm5,mm6 + movd mm4,DWORD [12+eax] + paddq mm7,mm4 + movd DWORD [eax],mm1 + movd mm2,DWORD [16+edx] + pmuludq mm2,mm0 + psrlq mm1,32 + movd mm4,DWORD [20+edx] + pmuludq mm4,mm0 + paddq mm1,mm3 + movd mm6,DWORD [24+edx] + pmuludq mm6,mm0 + movd DWORD [4+eax],mm1 + psrlq mm1,32 + movd mm3,DWORD [28+edx] + add edx,32 + pmuludq mm3,mm0 + paddq mm1,mm5 + movd mm5,DWORD [16+eax] + paddq mm2,mm5 + movd DWORD [8+eax],mm1 + psrlq mm1,32 + paddq mm1,mm7 + movd mm5,DWORD [20+eax] + paddq mm4,mm5 + movd DWORD [12+eax],mm1 + psrlq mm1,32 + paddq mm1,mm2 + movd mm5,DWORD [24+eax] + paddq mm6,mm5 + movd DWORD [16+eax],mm1 + psrlq mm1,32 + paddq mm1,mm4 + movd mm5,DWORD [28+eax] + paddq mm3,mm5 + movd DWORD [20+eax],mm1 + psrlq mm1,32 + paddq mm1,mm6 + movd DWORD [24+eax],mm1 + psrlq mm1,32 + paddq mm1,mm3 + movd DWORD [28+eax],mm1 + lea eax,[32+eax] + psrlq mm1,32 + sub ecx,8 + jz NEAR L$003maw_sse2_exit +L$001maw_sse2_entry: + test ecx,4294967288 + jnz NEAR L$002maw_sse2_unrolled +align 4 +L$004maw_sse2_loop: + movd mm2,DWORD [edx] + movd mm3,DWORD [eax] + pmuludq mm2,mm0 + lea edx,[4+edx] + paddq mm1,mm3 + paddq mm1,mm2 + movd DWORD [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,[4+eax] + jnz NEAR L$004maw_sse2_loop +L$003maw_sse2_exit: + movd eax,mm1 + emms + ret +align 16 +L$000maw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD [20+esp] + mov ecx,DWORD [28+esp] + mov ebx,DWORD [24+esp] + and ecx,4294967288 + mov ebp,DWORD [32+esp] + push ecx + jz NEAR L$005maw_finish +align 16 +L$006maw_loop: + ; Round 0 + mov eax,DWORD [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [edi] + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD [4+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [4+edi] + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD [8+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [8+edi] + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD [12+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [12+edi] + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD [16+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [16+edi] + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD [20+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [20+edi] + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD [24+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [24+edi] + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD [28+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [28+edi] + adc edx,0 + mov DWORD [28+edi],eax + mov esi,edx + ; + sub ecx,8 + lea ebx,[32+ebx] + lea edi,[32+edi] + jnz NEAR L$006maw_loop +L$005maw_finish: + mov ecx,DWORD [32+esp] + and ecx,7 + jnz NEAR L$007maw_finish2 + jmp NEAR L$008maw_end +L$007maw_finish2: + ; Tail Round 0 + mov eax,DWORD [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [edi] + adc edx,0 + dec ecx + mov DWORD [edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 1 + mov eax,DWORD [4+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [4+edi] + adc edx,0 + dec ecx + mov DWORD [4+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 2 + mov eax,DWORD [8+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [8+edi] + adc edx,0 + dec ecx + mov DWORD [8+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 3 + mov eax,DWORD [12+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [12+edi] + adc edx,0 + dec ecx + mov DWORD [12+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 4 + mov eax,DWORD [16+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [16+edi] + adc edx,0 + dec ecx + mov DWORD [16+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 5 + mov eax,DWORD [20+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [20+edi] + adc edx,0 + dec ecx + mov DWORD [20+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 6 + mov eax,DWORD [24+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [24+edi] + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx +L$008maw_end: + mov eax,esi + pop ecx + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_mul_words +align 16 +_bn_mul_words: +L$_bn_mul_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$009mw_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] + movd mm0,DWORD [16+esp] + pxor mm1,mm1 +align 16 +L$010mw_sse2_loop: + movd mm2,DWORD [edx] + pmuludq mm2,mm0 + lea edx,[4+edx] + paddq mm1,mm2 + movd DWORD [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,[4+eax] + jnz NEAR L$010mw_sse2_loop + movd eax,mm1 + emms + ret +align 16 +L$009mw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD [20+esp] + mov ebx,DWORD [24+esp] + mov ebp,DWORD [28+esp] + mov ecx,DWORD [32+esp] + and ebp,4294967288 + jz NEAR L$011mw_finish +L$012mw_loop: + ; Round 0 + mov eax,DWORD [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD [4+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD [8+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD [12+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD [16+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD [20+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD [24+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD [28+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [28+edi],eax + mov esi,edx + ; + add ebx,32 + add edi,32 + sub ebp,8 + jz NEAR L$011mw_finish + jmp NEAR L$012mw_loop +L$011mw_finish: + mov ebp,DWORD [28+esp] + and ebp,7 + jnz NEAR L$013mw_finish2 + jmp NEAR L$014mw_end +L$013mw_finish2: + ; Tail Round 0 + mov eax,DWORD [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 1 + mov eax,DWORD [4+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 2 + mov eax,DWORD [8+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 3 + mov eax,DWORD [12+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 4 + mov eax,DWORD [16+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 5 + mov eax,DWORD [20+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 6 + mov eax,DWORD [24+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx +L$014mw_end: + mov eax,esi + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sqr_words +align 16 +_bn_sqr_words: +L$_bn_sqr_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$015sqr_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] +align 16 +L$016sqr_sse2_loop: + movd mm0,DWORD [edx] + pmuludq mm0,mm0 + lea edx,[4+edx] + movq [eax],mm0 + sub ecx,1 + lea eax,[8+eax] + jnz NEAR L$016sqr_sse2_loop + emms + ret +align 16 +L$015sqr_non_sse2: + push ebp + push ebx + push esi + push edi + ; + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov ebx,DWORD [28+esp] + and ebx,4294967288 + jz NEAR L$017sw_finish +L$018sw_loop: + ; Round 0 + mov eax,DWORD [edi] + mul eax + mov DWORD [esi],eax + mov DWORD [4+esi],edx + ; Round 4 + mov eax,DWORD [4+edi] + mul eax + mov DWORD [8+esi],eax + mov DWORD [12+esi],edx + ; Round 8 + mov eax,DWORD [8+edi] + mul eax + mov DWORD [16+esi],eax + mov DWORD [20+esi],edx + ; Round 12 + mov eax,DWORD [12+edi] + mul eax + mov DWORD [24+esi],eax + mov DWORD [28+esi],edx + ; Round 16 + mov eax,DWORD [16+edi] + mul eax + mov DWORD [32+esi],eax + mov DWORD [36+esi],edx + ; Round 20 + mov eax,DWORD [20+edi] + mul eax + mov DWORD [40+esi],eax + mov DWORD [44+esi],edx + ; Round 24 + mov eax,DWORD [24+edi] + mul eax + mov DWORD [48+esi],eax + mov DWORD [52+esi],edx + ; Round 28 + mov eax,DWORD [28+edi] + mul eax + mov DWORD [56+esi],eax + mov DWORD [60+esi],edx + ; + add edi,32 + add esi,64 + sub ebx,8 + jnz NEAR L$018sw_loop +L$017sw_finish: + mov ebx,DWORD [28+esp] + and ebx,7 + jz NEAR L$019sw_end + ; Tail Round 0 + mov eax,DWORD [edi] + mul eax + mov DWORD [esi],eax + dec ebx + mov DWORD [4+esi],edx + jz NEAR L$019sw_end + ; Tail Round 1 + mov eax,DWORD [4+edi] + mul eax + mov DWORD [8+esi],eax + dec ebx + mov DWORD [12+esi],edx + jz NEAR L$019sw_end + ; Tail Round 2 + mov eax,DWORD [8+edi] + mul eax + mov DWORD [16+esi],eax + dec ebx + mov DWORD [20+esi],edx + jz NEAR L$019sw_end + ; Tail Round 3 + mov eax,DWORD [12+edi] + mul eax + mov DWORD [24+esi],eax + dec ebx + mov DWORD [28+esi],edx + jz NEAR L$019sw_end + ; Tail Round 4 + mov eax,DWORD [16+edi] + mul eax + mov DWORD [32+esi],eax + dec ebx + mov DWORD [36+esi],edx + jz NEAR L$019sw_end + ; Tail Round 5 + mov eax,DWORD [20+edi] + mul eax + mov DWORD [40+esi],eax + dec ebx + mov DWORD [44+esi],edx + jz NEAR L$019sw_end + ; Tail Round 6 + mov eax,DWORD [24+edi] + mul eax + mov DWORD [48+esi],eax + mov DWORD [52+esi],edx +L$019sw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_div_words +align 16 +_bn_div_words: +L$_bn_div_words_begin: + mov edx,DWORD [4+esp] + mov eax,DWORD [8+esp] + mov ecx,DWORD [12+esp] + div ecx + ret +global _bn_add_words +align 16 +_bn_add_words: +L$_bn_add_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$020aw_finish +L$021aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$021aw_loop +L$020aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$022aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx +L$022aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sub_words +align 16 +_bn_sub_words: +L$_bn_sub_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$023aw_finish +L$024aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$024aw_loop +L$023aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$025aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx +L$025aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +segment .bss +common _OPENSSL_ia32cap_P 16 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/bn-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/bn-armv8-apple.S new file mode 100644 index 0000000000..5e3471a04e --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bn-armv8-apple.S @@ -0,0 +1,89 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.text + +// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); + +.globl _bn_add_words +.private_extern _bn_add_words +.align 4 +_bn_add_words: + AARCH64_VALID_CALL_TARGET + # Clear the carry flag. + cmn xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split x3 = 2 * x8 + x3. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr x8, x3, #1 + and x3, x3, #1 + + cbz x8, Ladd_tail +Ladd_loop: + ldp x4, x5, [x1], #16 + ldp x6, x7, [x2], #16 + sub x8, x8, #1 + adcs x4, x4, x6 + adcs x5, x5, x7 + stp x4, x5, [x0], #16 + cbnz x8, Ladd_loop + +Ladd_tail: + cbz x3, Ladd_exit + ldr x4, [x1], #8 + ldr x6, [x2], #8 + adcs x4, x4, x6 + str x4, [x0], #8 + +Ladd_exit: + cset x0, cs + ret + + +// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); + +.globl _bn_sub_words +.private_extern _bn_sub_words +.align 4 +_bn_sub_words: + AARCH64_VALID_CALL_TARGET + # Set the carry flag. Arm's borrow bit is flipped from the carry flag, + # so we want C = 1 here. + cmp xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split x3 = 2 * x8 + x3. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr x8, x3, #1 + and x3, x3, #1 + + cbz x8, Lsub_tail +Lsub_loop: + ldp x4, x5, [x1], #16 + ldp x6, x7, [x2], #16 + sub x8, x8, #1 + sbcs x4, x4, x6 + sbcs x5, x5, x7 + stp x4, x5, [x0], #16 + cbnz x8, Lsub_loop + +Lsub_tail: + cbz x3, Lsub_exit + ldr x4, [x1], #8 + ldr x6, [x2], #8 + sbcs x4, x4, x6 + str x4, [x0], #8 + +Lsub_exit: + cset x0, cc + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/bn-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/bn-armv8-linux.S new file mode 100644 index 0000000000..2b8823ab36 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bn-armv8-linux.S @@ -0,0 +1,89 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.text + +// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); +.type bn_add_words, %function +.globl bn_add_words +.hidden bn_add_words +.align 4 +bn_add_words: + AARCH64_VALID_CALL_TARGET + # Clear the carry flag. + cmn xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split x3 = 2 * x8 + x3. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr x8, x3, #1 + and x3, x3, #1 + + cbz x8, .Ladd_tail +.Ladd_loop: + ldp x4, x5, [x1], #16 + ldp x6, x7, [x2], #16 + sub x8, x8, #1 + adcs x4, x4, x6 + adcs x5, x5, x7 + stp x4, x5, [x0], #16 + cbnz x8, .Ladd_loop + +.Ladd_tail: + cbz x3, .Ladd_exit + ldr x4, [x1], #8 + ldr x6, [x2], #8 + adcs x4, x4, x6 + str x4, [x0], #8 + +.Ladd_exit: + cset x0, cs + ret +.size bn_add_words,.-bn_add_words + +// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); +.type bn_sub_words, %function +.globl bn_sub_words +.hidden bn_sub_words +.align 4 +bn_sub_words: + AARCH64_VALID_CALL_TARGET + # Set the carry flag. Arm's borrow bit is flipped from the carry flag, + # so we want C = 1 here. + cmp xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split x3 = 2 * x8 + x3. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr x8, x3, #1 + and x3, x3, #1 + + cbz x8, .Lsub_tail +.Lsub_loop: + ldp x4, x5, [x1], #16 + ldp x6, x7, [x2], #16 + sub x8, x8, #1 + sbcs x4, x4, x6 + sbcs x5, x5, x7 + stp x4, x5, [x0], #16 + cbnz x8, .Lsub_loop + +.Lsub_tail: + cbz x3, .Lsub_exit + ldr x4, [x1], #8 + ldr x6, [x2], #8 + sbcs x4, x4, x6 + str x4, [x0], #8 + +.Lsub_exit: + cset x0, cc + ret +.size bn_sub_words,.-bn_sub_words +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/bn-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/bn-armv8-win.S new file mode 100644 index 0000000000..af97080007 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bn-armv8-win.S @@ -0,0 +1,89 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.text + +// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); + +.globl bn_add_words + +.align 4 +bn_add_words: + AARCH64_VALID_CALL_TARGET + # Clear the carry flag. + cmn xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split x3 = 2 * x8 + x3. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr x8, x3, #1 + and x3, x3, #1 + + cbz x8, Ladd_tail +Ladd_loop: + ldp x4, x5, [x1], #16 + ldp x6, x7, [x2], #16 + sub x8, x8, #1 + adcs x4, x4, x6 + adcs x5, x5, x7 + stp x4, x5, [x0], #16 + cbnz x8, Ladd_loop + +Ladd_tail: + cbz x3, Ladd_exit + ldr x4, [x1], #8 + ldr x6, [x2], #8 + adcs x4, x4, x6 + str x4, [x0], #8 + +Ladd_exit: + cset x0, cs + ret + + +// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); + +.globl bn_sub_words + +.align 4 +bn_sub_words: + AARCH64_VALID_CALL_TARGET + # Set the carry flag. Arm's borrow bit is flipped from the carry flag, + # so we want C = 1 here. + cmp xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split x3 = 2 * x8 + x3. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr x8, x3, #1 + and x3, x3, #1 + + cbz x8, Lsub_tail +Lsub_loop: + ldp x4, x5, [x1], #16 + ldp x6, x7, [x2], #16 + sub x8, x8, #1 + sbcs x4, x4, x6 + sbcs x5, x5, x7 + stp x4, x5, [x0], #16 + cbnz x8, Lsub_loop + +Lsub_tail: + cbz x3, Lsub_exit + ldr x4, [x1], #8 + ldr x6, [x2], #8 + sbcs x4, x4, x6 + str x4, [x0], #8 + +Lsub_exit: + cset x0, cc + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/bsaes-armv7-linux.S b/yass/third_party/boringssl/src/gen/bcm/bsaes-armv7-linux.S new file mode 100644 index 0000000000..01a9ead28a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/bsaes-armv7-linux.S @@ -0,0 +1,1517 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +@ Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +@ +@ Licensed under the OpenSSL license (the "License"). You may not use +@ this file except in compliance with the License. You can obtain a copy +@ in the file LICENSE in the source distribution or at +@ https://www.openssl.org/source/license.html + + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ of Linaro. Permission to use under GPL terms is granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ + +@ April-August 2013 +@ Add CBC, CTR and XTS subroutines and adapt for kernel use; courtesy of Ard. + +#ifndef __KERNEL__ +# include + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_MAX_ARCH__ 7 +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#if defined(__thumb2__) && !defined(__APPLE__) +.thumb +#else +.code 32 +# undef __thumb2__ +#endif + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,. + vldmia r4!, {q9} @ round 0 key +#if defined(__thumb2__) || defined(__APPLE__) + adr r6,.LM0ISR +#else + add r6,r6,#.LM0ISR-_bsaes_decrypt8 +#endif + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8,q9,q10,q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR:@ InvShiftRows constants +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR:@ ShiftRows constants +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: +.quad 0x090d01050c000408, 0x03070b0f060a0e02 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,. + vldmia r4!, {q9} @ round 0 key +#if defined(__thumb2__) || defined(__APPLE__) + adr r6,.LM0SR +#else + sub r6,r6,#_bsaes_encrypt8-.LM0SR +#endif + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8,q9,q10,q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,. + vld1.8 {q7}, [r4]! @ load round 0 key +#if defined(__thumb2__) || defined(__APPLE__) + adr r6,.LM0 +#else + sub r6,r6,#_bsaes_key_convert-.LM0 +#endif + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0,q1,q2,q3,q4,q5,q6,q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.globl bsaes_cbc_encrypt +.hidden bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: + @ In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for + @ short inputs. We patch this out, using bsaes for all input sizes. + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 + +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0,q1}, [r0]! @ load input + vld1.8 {q2,q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4,q5}, [r0]! + mov r5, r10 + vld1.8 {q6,q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12,q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14,q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0,q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + @ Set up most parameters for the _bsaes_decrypt8 call. +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12,q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0,q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0,q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0,q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0,q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0,q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0,q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q15}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vst1.8 {q0}, [r1]! @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero:@ wipe key schedule [if any] + vstmia sp!, {q0,q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.globl bsaes_ctr32_encrypt_blocks +.hidden bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + @ In OpenSSL, short inputs fall back to aes_nohw_* here. We patch this + @ out to retain a constant-time implementation. + mov ip, sp + stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter +#ifdef __APPLE__ + mov r8, #:lower16:(.LREVM0SR-.LM0) + add r8, r6, r8 +#else + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 +#endif + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 + add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter +#ifdef __APPLE__ + mov r6, #:lower16:(.LREVM0SR-.LSR) + sub r6, r8, r6 +#else + sub r6, r8, #.LREVM0SR-.LSR @ pass constants +#endif + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8,q9}, [r0]! @ load input + vld1.8 {q10,q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12,q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14,q15}, [r0]! + veor q3, q12 + vst1.8 {q0,q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero:@ wipe key schedule [if any] + vstmia sp!, {q0,q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0,q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return + + @ OpenSSL contains aes_nohw_* fallback code here. We patch this + @ out to retain a constant-time implementation. +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/co-586-apple.S b/yass/third_party/boringssl/src/gen/bcm/co-586-apple.S new file mode 100644 index 0000000000..ab985eec29 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/co-586-apple.S @@ -0,0 +1,1256 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _bn_mul_comba8 +.private_extern _bn_mul_comba8 +.align 4 +_bn_mul_comba8: +L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[4]*b[0] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[5]*b[0] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[1] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[4] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[6]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[1] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[2] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[4] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[5] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + # saved r[6] + # ################## Calculate word 7 + xorl %ebx,%ebx + # mul a[7]*b[0] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[2] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[3] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[4] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[5] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[6] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + # saved r[7] + # ################## Calculate word 8 + xorl %ecx,%ecx + # mul a[7]*b[1] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[3] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[4] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[5] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[6] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + # saved r[8] + # ################## Calculate word 9 + xorl %ebp,%ebp + # mul a[7]*b[2] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[4] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[5] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[6] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + # saved r[9] + # ################## Calculate word 10 + xorl %ebx,%ebx + # mul a[7]*b[3] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[5] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[6] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + # saved r[10] + # ################## Calculate word 11 + xorl %ecx,%ecx + # mul a[7]*b[4] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[6] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + # saved r[11] + # ################## Calculate word 12 + xorl %ebp,%ebp + # mul a[7]*b[5] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + # saved r[12] + # ################## Calculate word 13 + xorl %ebx,%ebx + # mul a[7]*b[6] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + # saved r[13] + # ################## Calculate word 14 + xorl %ecx,%ecx + # mul a[7]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + # saved r[14] + # save r[15] + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_mul_comba4 +.private_extern _bn_mul_comba4 +.align 4 +_bn_mul_comba4: +L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + # saved r[6] + # save r[7] + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba8 +.private_extern _bn_sqr_comba8 +.align 4 +_bn_sqr_comba8: +L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[4]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[5]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + # sqr a[4]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[6]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[5]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + # sqr a[4]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + # saved r[6] + # ############### Calculate word 7 + xorl %ebx,%ebx + # sqr a[7]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[6]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + # sqr a[5]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + # sqr a[4]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + # saved r[7] + # ############### Calculate word 8 + xorl %ecx,%ecx + # sqr a[7]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[6]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + # sqr a[5]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + # sqr a[4]*a[4] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + # saved r[8] + # ############### Calculate word 9 + xorl %ebp,%ebp + # sqr a[7]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + # sqr a[6]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + # sqr a[5]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + # saved r[9] + # ############### Calculate word 10 + xorl %ebx,%ebx + # sqr a[7]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + # sqr a[6]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + # sqr a[5]*a[5] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + # saved r[10] + # ############### Calculate word 11 + xorl %ecx,%ecx + # sqr a[7]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + # sqr a[6]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + # saved r[11] + # ############### Calculate word 12 + xorl %ebp,%ebp + # sqr a[7]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + # sqr a[6]*a[6] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + # saved r[12] + # ############### Calculate word 13 + xorl %ebx,%ebx + # sqr a[7]*a[6] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + # saved r[13] + # ############### Calculate word 14 + xorl %ecx,%ecx + # sqr a[7]*a[7] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + # saved r[14] + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba4 +.private_extern _bn_sqr_comba4 +.align 4 +_bn_sqr_comba4: +L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + # saved r[6] + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/co-586-linux.S b/yass/third_party/boringssl/src/gen/bcm/co-586-linux.S new file mode 100644 index 0000000000..b4812e31cc --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/co-586-linux.S @@ -0,0 +1,1264 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl bn_mul_comba8 +.hidden bn_mul_comba8 +.type bn_mul_comba8,@function +.align 16 +bn_mul_comba8: +.L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + + + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba8,.-.L_bn_mul_comba8_begin +.globl bn_mul_comba4 +.hidden bn_mul_comba4 +.type bn_mul_comba4,@function +.align 16 +bn_mul_comba4: +.L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + + + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba4,.-.L_bn_mul_comba4_begin +.globl bn_sqr_comba8 +.hidden bn_sqr_comba8 +.type bn_sqr_comba8,@function +.align 16 +bn_sqr_comba8: +.L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + + + xorl %ecx,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba8,.-.L_bn_sqr_comba8_begin +.globl bn_sqr_comba4 +.hidden bn_sqr_comba4 +.type bn_sqr_comba4,@function +.align 16 +bn_sqr_comba4: +.L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/co-586-win.asm b/yass/third_party/boringssl/src/gen/bcm/co-586-win.asm new file mode 100644 index 0000000000..6ad46962c9 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/co-586-win.asm @@ -0,0 +1,1263 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _bn_mul_comba8 +align 16 +_bn_mul_comba8: +L$_bn_mul_comba8_begin: + push esi + mov esi,DWORD [12+esp] + push edi + mov edi,DWORD [20+esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD [esi] + xor ecx,ecx + mov edx,DWORD [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [eax],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [4+eax],ecx + mov eax,DWORD [8+esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [8+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [12+eax],ebx + mov eax,DWORD [16+esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[4]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[0]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [16+eax],ecx + mov eax,DWORD [20+esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[5]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[4]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [12+esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [16+edi] + adc ecx,0 + ; mul a[1]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[0]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [20+eax],ebp + mov eax,DWORD [24+esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[6]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[5]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [16+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[4]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [12+esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [16+edi] + adc ebp,0 + ; mul a[2]*b[4] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [20+edi] + adc ebp,0 + ; mul a[1]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[0]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [24+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[6] + ; ################## Calculate word 7 + xor ebx,ebx + ; mul a[7]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[6]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[5]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [16+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[4]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[3]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [20+edi] + adc ebx,0 + ; mul a[2]*b[5] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [24+edi] + adc ebx,0 + ; mul a[1]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[0]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + mov DWORD [28+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[7] + ; ################## Calculate word 8 + xor ecx,ecx + ; mul a[7]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [24+esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[6]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[5]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [16+edi] + adc ecx,0 + ; mul a[4]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [12+esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[3]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [24+edi] + adc ecx,0 + ; mul a[2]*b[6] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [28+edi] + adc ecx,0 + ; mul a[1]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + mov DWORD [32+eax],ebp + mov eax,DWORD [28+esi] + ; saved r[8] + ; ################## Calculate word 9 + xor ebp,ebp + ; mul a[7]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [24+esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[6]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [16+edi] + adc ebp,0 + ; mul a[5]*b[4] + mul edx + add ebx,eax + mov eax,DWORD [16+esi] + adc ecx,edx + mov edx,DWORD [20+edi] + adc ebp,0 + ; mul a[4]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [12+esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[3]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [28+edi] + adc ebp,0 + ; mul a[2]*b[7] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + mov DWORD [36+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[9] + ; ################## Calculate word 10 + xor ebx,ebx + ; mul a[7]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[6]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [20+esi] + adc ebp,edx + mov edx,DWORD [20+edi] + adc ebx,0 + ; mul a[5]*b[5] + mul edx + add ecx,eax + mov eax,DWORD [16+esi] + adc ebp,edx + mov edx,DWORD [24+edi] + adc ebx,0 + ; mul a[4]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[3]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + mov DWORD [40+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[10] + ; ################## Calculate word 11 + xor ecx,ecx + ; mul a[7]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [24+esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[6]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [20+esi] + adc ebx,edx + mov edx,DWORD [24+edi] + adc ecx,0 + ; mul a[5]*b[6] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [28+edi] + adc ecx,0 + ; mul a[4]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + mov DWORD [44+eax],ebp + mov eax,DWORD [28+esi] + ; saved r[11] + ; ################## Calculate word 12 + xor ebp,ebp + ; mul a[7]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [24+esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[6]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [28+edi] + adc ebp,0 + ; mul a[5]*b[7] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + mov DWORD [48+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[12] + ; ################## Calculate word 13 + xor ebx,ebx + ; mul a[7]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[6]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + mov DWORD [52+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[13] + ; ################## Calculate word 14 + xor ecx,ecx + ; mul a[7]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + adc ecx,0 + mov DWORD [56+eax],ebp + ; saved r[14] + ; save r[15] + mov DWORD [60+eax],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_mul_comba4 +align 16 +_bn_mul_comba4: +L$_bn_mul_comba4_begin: + push esi + mov esi,DWORD [12+esp] + push edi + mov edi,DWORD [20+esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD [esi] + xor ecx,ecx + mov edx,DWORD [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [eax],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [4+eax],ecx + mov eax,DWORD [8+esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [8+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + mov DWORD [12+eax],ebx + mov eax,DWORD [12+esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + mov DWORD [16+eax],ecx + mov eax,DWORD [12+esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + mov DWORD [20+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + adc ebp,0 + mov DWORD [24+eax],ebx + ; saved r[6] + ; save r[7] + mov DWORD [28+eax],ecx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_sqr_comba8 +align 16 +_bn_sqr_comba8: +L$_bn_sqr_comba8_begin: + push esi + push edi + push ebp + push ebx + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [edi],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + mov DWORD [4+edi],ecx + mov edx,DWORD [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [4+esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [esi] + adc ecx,0 + mov DWORD [8+edi],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [8+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [16+esi] + adc ebp,0 + mov DWORD [12+edi],ebx + mov edx,DWORD [esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[4]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [12+esi] + adc ebx,0 + mov edx,DWORD [4+esi] + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [esi] + adc ebx,0 + mov DWORD [16+edi],ecx + mov eax,DWORD [20+esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[5]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [16+esi] + adc ecx,0 + mov edx,DWORD [4+esi] + ; sqr a[4]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [12+esi] + adc ecx,0 + mov edx,DWORD [8+esi] + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov DWORD [20+edi],ebp + mov edx,DWORD [esi] + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[6]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [20+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[5]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [16+esi] + adc ebp,0 + mov edx,DWORD [8+esi] + ; sqr a[4]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [12+esi] + adc ebp,0 + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [24+edi],ebx + mov eax,DWORD [28+esi] + ; saved r[6] + ; ############### Calculate word 7 + xor ebx,ebx + ; sqr a[7]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [24+esi] + adc ebx,0 + mov edx,DWORD [4+esi] + ; sqr a[6]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [20+esi] + adc ebx,0 + mov edx,DWORD [8+esi] + ; sqr a[5]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [16+esi] + adc ebx,0 + mov edx,DWORD [12+esi] + ; sqr a[4]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [28+esi] + adc ebx,0 + mov DWORD [28+edi],ecx + mov edx,DWORD [4+esi] + ; saved r[7] + ; ############### Calculate word 8 + xor ecx,ecx + ; sqr a[7]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov edx,DWORD [8+esi] + ; sqr a[6]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [20+esi] + adc ecx,0 + mov edx,DWORD [12+esi] + ; sqr a[5]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [16+esi] + adc ecx,0 + ; sqr a[4]*a[4] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [8+esi] + adc ecx,0 + mov DWORD [32+edi],ebp + mov eax,DWORD [28+esi] + ; saved r[8] + ; ############### Calculate word 9 + xor ebp,ebp + ; sqr a[7]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [24+esi] + adc ebp,0 + mov edx,DWORD [12+esi] + ; sqr a[6]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [20+esi] + adc ebp,0 + mov edx,DWORD [16+esi] + ; sqr a[5]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [28+esi] + adc ebp,0 + mov DWORD [36+edi],ebx + mov edx,DWORD [12+esi] + ; saved r[9] + ; ############### Calculate word 10 + xor ebx,ebx + ; sqr a[7]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [24+esi] + adc ebx,0 + mov edx,DWORD [16+esi] + ; sqr a[6]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [20+esi] + adc ebx,0 + ; sqr a[5]*a[5] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [16+esi] + adc ebx,0 + mov DWORD [40+edi],ecx + mov eax,DWORD [28+esi] + ; saved r[10] + ; ############### Calculate word 11 + xor ecx,ecx + ; sqr a[7]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov edx,DWORD [20+esi] + ; sqr a[6]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [28+esi] + adc ecx,0 + mov DWORD [44+edi],ebp + mov edx,DWORD [20+esi] + ; saved r[11] + ; ############### Calculate word 12 + xor ebp,ebp + ; sqr a[7]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [24+esi] + adc ebp,0 + ; sqr a[6]*a[6] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [24+esi] + adc ebp,0 + mov DWORD [48+edi],ebx + mov eax,DWORD [28+esi] + ; saved r[12] + ; ############### Calculate word 13 + xor ebx,ebx + ; sqr a[7]*a[6] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [28+esi] + adc ebx,0 + mov DWORD [52+edi],ecx + ; saved r[13] + ; ############### Calculate word 14 + xor ecx,ecx + ; sqr a[7]*a[7] + mul eax + add ebp,eax + adc ebx,edx + adc ecx,0 + mov DWORD [56+edi],ebp + ; saved r[14] + mov DWORD [60+edi],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_sqr_comba4 +align 16 +_bn_sqr_comba4: +L$_bn_sqr_comba4_begin: + push esi + push edi + push ebp + push ebx + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [edi],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + mov DWORD [4+edi],ecx + mov edx,DWORD [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [4+esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [esi] + adc ecx,0 + mov DWORD [8+edi],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [8+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [12+esi] + adc ebp,0 + mov DWORD [12+edi],ebx + mov edx,DWORD [4+esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [8+esi] + adc ebx,0 + mov DWORD [16+edi],ecx + mov eax,DWORD [12+esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [12+esi] + adc ecx,0 + mov DWORD [20+edi],ebp + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + adc ebp,0 + mov DWORD [24+edi],ebx + ; saved r[6] + mov DWORD [28+edi],ecx + pop ebx + pop ebp + pop edi + pop esi + ret +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-armv4-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghash-armv4-linux.S new file mode 100644 index 0000000000..7c04f89356 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-armv4-linux.S @@ -0,0 +1,244 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +#include + +@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both +@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL +@ instructions are in aesv8-armx.pl.) +.arch armv7-a + +.text +#if defined(__thumb2__) || defined(__clang__) +.syntax unified +#define ldrplb ldrbpl +#define ldrneb ldrbne +#endif +#if defined(__thumb2__) +.thumb +#else +.code 32 +#endif +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl gcm_init_neon +.hidden gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 d7,[r1]! @ load H + vmov.i8 q8,#0xe1 + vld1.64 d6,[r1] + vshl.i64 d17,#57 + vshr.u64 d16,#63 @ t0=0xc2....01 + vdup.8 q9,d7[7] + vshr.u64 d26,d6,#63 + vshr.s8 q9,#7 @ broadcast carry bit + vshl.i64 q3,q3,#1 + vand q8,q8,q9 + vorr d7,d26 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vstmia r0,{q3} + + bx lr @ bx lr +.size gcm_init_neon,.-gcm_init_neon + +.globl gcm_gmult_neon +.hidden gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + vld1.64 d7,[r0]! @ load Xi + vld1.64 d6,[r0]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26,d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff +#ifdef __ARMEL__ + vrev64.8 q3,q3 +#endif + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + mov r3,#16 + b .Lgmult_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.globl gcm_ghash_neon +.hidden gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + vld1.64 d1,[r0]! @ load Xi + vld1.64 d0,[r0]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26,d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff +#ifdef __ARMEL__ + vrev64.8 q0,q0 +#endif + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 d7,[r2]! @ load inp + vld1.64 d6,[r2]! +#ifdef __ARMEL__ + vrev64.8 q3,q3 +#endif + veor q3,q0 @ inp^=Xi +.Lgmult_neon: + vext.8 d16, d26, d26, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d0, d6, d6, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d18, d26, d26, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d26, d22 @ G = A*B2 + vext.8 d20, d26, d26, #3 @ A3 + veor q8, q8, q0 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d0, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d26, d22 @ K = A*B4 + veor q10, q10, q0 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q0, d26, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q0, q0, q8 + veor q0, q0, q10 + veor d6,d6,d7 @ Karatsuba pre-processing + vext.8 d16, d28, d28, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d2, d6, d6, #1 @ B1 + vmull.p8 q1, d28, d2 @ E = A*B1 + vext.8 d18, d28, d28, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d28, d22 @ G = A*B2 + vext.8 d20, d28, d28, #3 @ A3 + veor q8, q8, q1 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d2, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q1, d28, d2 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d28, d22 @ K = A*B4 + veor q10, q10, q1 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q1, d28, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q1, q1, q8 + veor q1, q1, q10 + vext.8 d16, d27, d27, #1 @ A1 + vmull.p8 q8, d16, d7 @ F = A1*B + vext.8 d4, d7, d7, #1 @ B1 + vmull.p8 q2, d27, d4 @ E = A*B1 + vext.8 d18, d27, d27, #2 @ A2 + vmull.p8 q9, d18, d7 @ H = A2*B + vext.8 d22, d7, d7, #2 @ B2 + vmull.p8 q11, d27, d22 @ G = A*B2 + vext.8 d20, d27, d27, #3 @ A3 + veor q8, q8, q2 @ L = E + F + vmull.p8 q10, d20, d7 @ J = A3*B + vext.8 d4, d7, d7, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q2, d27, d4 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d7, d7, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d27, d22 @ K = A*B4 + veor q10, q10, q2 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q2, d27, d7 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q2, q2, q8 + veor q2, q2, q10 + veor q1,q1,q0 @ Karatsuba post-processing + veor q1,q1,q2 + veor d1,d1,d2 + veor d4,d4,d3 @ Xh|Xl - 256-bit result + + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 q9,q0,#57 @ 1st phase + vshl.i64 q10,q0,#62 + veor q10,q10,q9 @ + vshl.i64 q9,q0,#63 + veor q10, q10, q9 @ + veor d1,d1,d20 @ + veor d4,d4,d21 + + vshr.u64 q10,q0,#1 @ 2nd phase + veor q2,q2,q0 + veor q0,q0,q10 @ + vshr.u64 q10,q10,#6 + vshr.u64 q0,q0,#1 @ + veor q0,q0,q2 @ + veor q0,q0,q10 @ + + subs r3,#16 + bne .Loop_neon + +#ifdef __ARMEL__ + vrev64.8 q0,q0 +#endif + sub r0,#16 + vst1.64 d1,[r0]! @ write out Xi + vst1.64 d0,[r0] + + bx lr @ bx lr +.size gcm_ghash_neon,.-gcm_ghash_neon +#endif +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-apple.S new file mode 100644 index 0000000000..a76b8d1747 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-apple.S @@ -0,0 +1,335 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.text + +.globl _gcm_init_neon +.private_extern _gcm_init_neon + +.align 4 +_gcm_init_neon: + AARCH64_VALID_CALL_TARGET + // This function is adapted from gcm_init_v8. xC2 is t3. + ld1 {v17.2d}, [x1] // load H + movi v19.16b, #0xe1 + shl v19.2d, v19.2d, #57 // 0xc2.0 + ext v3.16b, v17.16b, v17.16b, #8 + ushr v18.2d, v19.2d, #63 + dup v17.4s, v17.s[1] + ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01 + ushr v18.2d, v3.2d, #63 + sshr v17.4s, v17.4s, #31 // broadcast carry bit + and v18.16b, v18.16b, v16.16b + shl v3.2d, v3.2d, #1 + ext v18.16b, v18.16b, v18.16b, #8 + and v16.16b, v16.16b, v17.16b + orr v3.16b, v3.16b, v18.16b // H<<<=1 + eor v5.16b, v3.16b, v16.16b // twisted H + st1 {v5.2d}, [x0] // store Htable[0] + ret + + +.globl _gcm_gmult_neon +.private_extern _gcm_gmult_neon + +.align 4 +_gcm_gmult_neon: + AARCH64_VALID_CALL_TARGET + ld1 {v3.16b}, [x0] // load Xi + ld1 {v5.1d}, [x1], #8 // load twisted H + ld1 {v6.1d}, [x1] + adrp x9, Lmasks@PAGE // load constants + add x9, x9, Lmasks@PAGEOFF + ld1 {v24.2d, v25.2d}, [x9] + rev64 v3.16b, v3.16b // byteswap Xi + ext v3.16b, v3.16b, v3.16b, #8 + eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing + + mov x3, #16 + b Lgmult_neon + + +.globl _gcm_ghash_neon +.private_extern _gcm_ghash_neon + +.align 4 +_gcm_ghash_neon: + AARCH64_VALID_CALL_TARGET + ld1 {v0.16b}, [x0] // load Xi + ld1 {v5.1d}, [x1], #8 // load twisted H + ld1 {v6.1d}, [x1] + adrp x9, Lmasks@PAGE // load constants + add x9, x9, Lmasks@PAGEOFF + ld1 {v24.2d, v25.2d}, [x9] + rev64 v0.16b, v0.16b // byteswap Xi + ext v0.16b, v0.16b, v0.16b, #8 + eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing + +Loop_neon: + ld1 {v3.16b}, [x2], #16 // load inp + rev64 v3.16b, v3.16b // byteswap inp + ext v3.16b, v3.16b, v3.16b, #8 + eor v3.16b, v3.16b, v0.16b // inp ^= Xi + +Lgmult_neon: + // Split the input into v3 and v4. (The upper halves are unused, + // so it is okay to leave them alone.) + ins v4.d[0], v3.d[1] + ext v16.8b, v5.8b, v5.8b, #1 // A1 + pmull v16.8h, v16.8b, v3.8b // F = A1*B + ext v0.8b, v3.8b, v3.8b, #1 // B1 + pmull v0.8h, v5.8b, v0.8b // E = A*B1 + ext v17.8b, v5.8b, v5.8b, #2 // A2 + pmull v17.8h, v17.8b, v3.8b // H = A2*B + ext v19.8b, v3.8b, v3.8b, #2 // B2 + pmull v19.8h, v5.8b, v19.8b // G = A*B2 + ext v18.8b, v5.8b, v5.8b, #3 // A3 + eor v16.16b, v16.16b, v0.16b // L = E + F + pmull v18.8h, v18.8b, v3.8b // J = A3*B + ext v0.8b, v3.8b, v3.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v0.8h, v5.8b, v0.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v3.8b, v3.8b, #4 // B4 + eor v18.16b, v18.16b, v0.16b // N = I + J + pmull v19.8h, v5.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v0.8h, v5.8b, v3.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v0.16b, v0.16b, v16.16b + eor v0.16b, v0.16b, v18.16b + eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing + ext v16.8b, v7.8b, v7.8b, #1 // A1 + pmull v16.8h, v16.8b, v3.8b // F = A1*B + ext v1.8b, v3.8b, v3.8b, #1 // B1 + pmull v1.8h, v7.8b, v1.8b // E = A*B1 + ext v17.8b, v7.8b, v7.8b, #2 // A2 + pmull v17.8h, v17.8b, v3.8b // H = A2*B + ext v19.8b, v3.8b, v3.8b, #2 // B2 + pmull v19.8h, v7.8b, v19.8b // G = A*B2 + ext v18.8b, v7.8b, v7.8b, #3 // A3 + eor v16.16b, v16.16b, v1.16b // L = E + F + pmull v18.8h, v18.8b, v3.8b // J = A3*B + ext v1.8b, v3.8b, v3.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v1.8h, v7.8b, v1.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v3.8b, v3.8b, #4 // B4 + eor v18.16b, v18.16b, v1.16b // N = I + J + pmull v19.8h, v7.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v1.8h, v7.8b, v3.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v1.16b, v1.16b, v16.16b + eor v1.16b, v1.16b, v18.16b + ext v16.8b, v6.8b, v6.8b, #1 // A1 + pmull v16.8h, v16.8b, v4.8b // F = A1*B + ext v2.8b, v4.8b, v4.8b, #1 // B1 + pmull v2.8h, v6.8b, v2.8b // E = A*B1 + ext v17.8b, v6.8b, v6.8b, #2 // A2 + pmull v17.8h, v17.8b, v4.8b // H = A2*B + ext v19.8b, v4.8b, v4.8b, #2 // B2 + pmull v19.8h, v6.8b, v19.8b // G = A*B2 + ext v18.8b, v6.8b, v6.8b, #3 // A3 + eor v16.16b, v16.16b, v2.16b // L = E + F + pmull v18.8h, v18.8b, v4.8b // J = A3*B + ext v2.8b, v4.8b, v4.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v2.8h, v6.8b, v2.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v4.8b, v4.8b, #4 // B4 + eor v18.16b, v18.16b, v2.16b // N = I + J + pmull v19.8h, v6.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v2.8h, v6.8b, v4.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v2.16b, v2.16b, v16.16b + eor v2.16b, v2.16b, v18.16b + ext v16.16b, v0.16b, v2.16b, #8 + eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing + eor v1.16b, v1.16b, v2.16b + eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi + ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result + // This is a no-op due to the ins instruction below. + // ins v2.d[0], v1.d[1] + + // equivalent of reduction_avx from ghash-x86_64.pl + shl v17.2d, v0.2d, #57 // 1st phase + shl v18.2d, v0.2d, #62 + eor v18.16b, v18.16b, v17.16b // + shl v17.2d, v0.2d, #63 + eor v18.16b, v18.16b, v17.16b // + // Note Xm contains {Xl.d[1], Xh.d[0]}. + eor v18.16b, v18.16b, v1.16b + ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0] + ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1] + + ushr v18.2d, v0.2d, #1 // 2nd phase + eor v2.16b, v2.16b,v0.16b + eor v0.16b, v0.16b,v18.16b // + ushr v18.2d, v18.2d, #6 + ushr v0.2d, v0.2d, #1 // + eor v0.16b, v0.16b, v2.16b // + eor v0.16b, v0.16b, v18.16b // + + subs x3, x3, #16 + bne Loop_neon + + rev64 v0.16b, v0.16b // byteswap Xi and write + ext v0.16b, v0.16b, v0.16b, #8 + st1 {v0.16b}, [x0] + + ret + + +.section __TEXT,__const +.align 4 +Lmasks: +.quad 0x0000ffffffffffff // k48 +.quad 0x00000000ffffffff // k32 +.quad 0x000000000000ffff // k16 +.quad 0x0000000000000000 // k0 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-linux.S new file mode 100644 index 0000000000..6203bc6f54 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-linux.S @@ -0,0 +1,335 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.text + +.globl gcm_init_neon +.hidden gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + AARCH64_VALID_CALL_TARGET + // This function is adapted from gcm_init_v8. xC2 is t3. + ld1 {v17.2d}, [x1] // load H + movi v19.16b, #0xe1 + shl v19.2d, v19.2d, #57 // 0xc2.0 + ext v3.16b, v17.16b, v17.16b, #8 + ushr v18.2d, v19.2d, #63 + dup v17.4s, v17.s[1] + ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01 + ushr v18.2d, v3.2d, #63 + sshr v17.4s, v17.4s, #31 // broadcast carry bit + and v18.16b, v18.16b, v16.16b + shl v3.2d, v3.2d, #1 + ext v18.16b, v18.16b, v18.16b, #8 + and v16.16b, v16.16b, v17.16b + orr v3.16b, v3.16b, v18.16b // H<<<=1 + eor v5.16b, v3.16b, v16.16b // twisted H + st1 {v5.2d}, [x0] // store Htable[0] + ret +.size gcm_init_neon,.-gcm_init_neon + +.globl gcm_gmult_neon +.hidden gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + AARCH64_VALID_CALL_TARGET + ld1 {v3.16b}, [x0] // load Xi + ld1 {v5.1d}, [x1], #8 // load twisted H + ld1 {v6.1d}, [x1] + adrp x9, .Lmasks // load constants + add x9, x9, :lo12:.Lmasks + ld1 {v24.2d, v25.2d}, [x9] + rev64 v3.16b, v3.16b // byteswap Xi + ext v3.16b, v3.16b, v3.16b, #8 + eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing + + mov x3, #16 + b .Lgmult_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.globl gcm_ghash_neon +.hidden gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + AARCH64_VALID_CALL_TARGET + ld1 {v0.16b}, [x0] // load Xi + ld1 {v5.1d}, [x1], #8 // load twisted H + ld1 {v6.1d}, [x1] + adrp x9, .Lmasks // load constants + add x9, x9, :lo12:.Lmasks + ld1 {v24.2d, v25.2d}, [x9] + rev64 v0.16b, v0.16b // byteswap Xi + ext v0.16b, v0.16b, v0.16b, #8 + eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing + +.Loop_neon: + ld1 {v3.16b}, [x2], #16 // load inp + rev64 v3.16b, v3.16b // byteswap inp + ext v3.16b, v3.16b, v3.16b, #8 + eor v3.16b, v3.16b, v0.16b // inp ^= Xi + +.Lgmult_neon: + // Split the input into v3 and v4. (The upper halves are unused, + // so it is okay to leave them alone.) + ins v4.d[0], v3.d[1] + ext v16.8b, v5.8b, v5.8b, #1 // A1 + pmull v16.8h, v16.8b, v3.8b // F = A1*B + ext v0.8b, v3.8b, v3.8b, #1 // B1 + pmull v0.8h, v5.8b, v0.8b // E = A*B1 + ext v17.8b, v5.8b, v5.8b, #2 // A2 + pmull v17.8h, v17.8b, v3.8b // H = A2*B + ext v19.8b, v3.8b, v3.8b, #2 // B2 + pmull v19.8h, v5.8b, v19.8b // G = A*B2 + ext v18.8b, v5.8b, v5.8b, #3 // A3 + eor v16.16b, v16.16b, v0.16b // L = E + F + pmull v18.8h, v18.8b, v3.8b // J = A3*B + ext v0.8b, v3.8b, v3.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v0.8h, v5.8b, v0.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v3.8b, v3.8b, #4 // B4 + eor v18.16b, v18.16b, v0.16b // N = I + J + pmull v19.8h, v5.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v0.8h, v5.8b, v3.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v0.16b, v0.16b, v16.16b + eor v0.16b, v0.16b, v18.16b + eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing + ext v16.8b, v7.8b, v7.8b, #1 // A1 + pmull v16.8h, v16.8b, v3.8b // F = A1*B + ext v1.8b, v3.8b, v3.8b, #1 // B1 + pmull v1.8h, v7.8b, v1.8b // E = A*B1 + ext v17.8b, v7.8b, v7.8b, #2 // A2 + pmull v17.8h, v17.8b, v3.8b // H = A2*B + ext v19.8b, v3.8b, v3.8b, #2 // B2 + pmull v19.8h, v7.8b, v19.8b // G = A*B2 + ext v18.8b, v7.8b, v7.8b, #3 // A3 + eor v16.16b, v16.16b, v1.16b // L = E + F + pmull v18.8h, v18.8b, v3.8b // J = A3*B + ext v1.8b, v3.8b, v3.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v1.8h, v7.8b, v1.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v3.8b, v3.8b, #4 // B4 + eor v18.16b, v18.16b, v1.16b // N = I + J + pmull v19.8h, v7.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v1.8h, v7.8b, v3.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v1.16b, v1.16b, v16.16b + eor v1.16b, v1.16b, v18.16b + ext v16.8b, v6.8b, v6.8b, #1 // A1 + pmull v16.8h, v16.8b, v4.8b // F = A1*B + ext v2.8b, v4.8b, v4.8b, #1 // B1 + pmull v2.8h, v6.8b, v2.8b // E = A*B1 + ext v17.8b, v6.8b, v6.8b, #2 // A2 + pmull v17.8h, v17.8b, v4.8b // H = A2*B + ext v19.8b, v4.8b, v4.8b, #2 // B2 + pmull v19.8h, v6.8b, v19.8b // G = A*B2 + ext v18.8b, v6.8b, v6.8b, #3 // A3 + eor v16.16b, v16.16b, v2.16b // L = E + F + pmull v18.8h, v18.8b, v4.8b // J = A3*B + ext v2.8b, v4.8b, v4.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v2.8h, v6.8b, v2.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v4.8b, v4.8b, #4 // B4 + eor v18.16b, v18.16b, v2.16b // N = I + J + pmull v19.8h, v6.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v2.8h, v6.8b, v4.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v2.16b, v2.16b, v16.16b + eor v2.16b, v2.16b, v18.16b + ext v16.16b, v0.16b, v2.16b, #8 + eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing + eor v1.16b, v1.16b, v2.16b + eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi + ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result + // This is a no-op due to the ins instruction below. + // ins v2.d[0], v1.d[1] + + // equivalent of reduction_avx from ghash-x86_64.pl + shl v17.2d, v0.2d, #57 // 1st phase + shl v18.2d, v0.2d, #62 + eor v18.16b, v18.16b, v17.16b // + shl v17.2d, v0.2d, #63 + eor v18.16b, v18.16b, v17.16b // + // Note Xm contains {Xl.d[1], Xh.d[0]}. + eor v18.16b, v18.16b, v1.16b + ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0] + ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1] + + ushr v18.2d, v0.2d, #1 // 2nd phase + eor v2.16b, v2.16b,v0.16b + eor v0.16b, v0.16b,v18.16b // + ushr v18.2d, v18.2d, #6 + ushr v0.2d, v0.2d, #1 // + eor v0.16b, v0.16b, v2.16b // + eor v0.16b, v0.16b, v18.16b // + + subs x3, x3, #16 + bne .Loop_neon + + rev64 v0.16b, v0.16b // byteswap Xi and write + ext v0.16b, v0.16b, v0.16b, #8 + st1 {v0.16b}, [x0] + + ret +.size gcm_ghash_neon,.-gcm_ghash_neon + +.section .rodata +.align 4 +.Lmasks: +.quad 0x0000ffffffffffff // k48 +.quad 0x00000000ffffffff // k32 +.quad 0x000000000000ffff // k16 +.quad 0x0000000000000000 // k0 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-win.S new file mode 100644 index 0000000000..d9688931e2 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-neon-armv8-win.S @@ -0,0 +1,341 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.text + +.globl gcm_init_neon + +.def gcm_init_neon + .type 32 +.endef +.align 4 +gcm_init_neon: + AARCH64_VALID_CALL_TARGET + // This function is adapted from gcm_init_v8. xC2 is t3. + ld1 {v17.2d}, [x1] // load H + movi v19.16b, #0xe1 + shl v19.2d, v19.2d, #57 // 0xc2.0 + ext v3.16b, v17.16b, v17.16b, #8 + ushr v18.2d, v19.2d, #63 + dup v17.4s, v17.s[1] + ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01 + ushr v18.2d, v3.2d, #63 + sshr v17.4s, v17.4s, #31 // broadcast carry bit + and v18.16b, v18.16b, v16.16b + shl v3.2d, v3.2d, #1 + ext v18.16b, v18.16b, v18.16b, #8 + and v16.16b, v16.16b, v17.16b + orr v3.16b, v3.16b, v18.16b // H<<<=1 + eor v5.16b, v3.16b, v16.16b // twisted H + st1 {v5.2d}, [x0] // store Htable[0] + ret + + +.globl gcm_gmult_neon + +.def gcm_gmult_neon + .type 32 +.endef +.align 4 +gcm_gmult_neon: + AARCH64_VALID_CALL_TARGET + ld1 {v3.16b}, [x0] // load Xi + ld1 {v5.1d}, [x1], #8 // load twisted H + ld1 {v6.1d}, [x1] + adrp x9, Lmasks // load constants + add x9, x9, :lo12:Lmasks + ld1 {v24.2d, v25.2d}, [x9] + rev64 v3.16b, v3.16b // byteswap Xi + ext v3.16b, v3.16b, v3.16b, #8 + eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing + + mov x3, #16 + b Lgmult_neon + + +.globl gcm_ghash_neon + +.def gcm_ghash_neon + .type 32 +.endef +.align 4 +gcm_ghash_neon: + AARCH64_VALID_CALL_TARGET + ld1 {v0.16b}, [x0] // load Xi + ld1 {v5.1d}, [x1], #8 // load twisted H + ld1 {v6.1d}, [x1] + adrp x9, Lmasks // load constants + add x9, x9, :lo12:Lmasks + ld1 {v24.2d, v25.2d}, [x9] + rev64 v0.16b, v0.16b // byteswap Xi + ext v0.16b, v0.16b, v0.16b, #8 + eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing + +Loop_neon: + ld1 {v3.16b}, [x2], #16 // load inp + rev64 v3.16b, v3.16b // byteswap inp + ext v3.16b, v3.16b, v3.16b, #8 + eor v3.16b, v3.16b, v0.16b // inp ^= Xi + +Lgmult_neon: + // Split the input into v3 and v4. (The upper halves are unused, + // so it is okay to leave them alone.) + ins v4.d[0], v3.d[1] + ext v16.8b, v5.8b, v5.8b, #1 // A1 + pmull v16.8h, v16.8b, v3.8b // F = A1*B + ext v0.8b, v3.8b, v3.8b, #1 // B1 + pmull v0.8h, v5.8b, v0.8b // E = A*B1 + ext v17.8b, v5.8b, v5.8b, #2 // A2 + pmull v17.8h, v17.8b, v3.8b // H = A2*B + ext v19.8b, v3.8b, v3.8b, #2 // B2 + pmull v19.8h, v5.8b, v19.8b // G = A*B2 + ext v18.8b, v5.8b, v5.8b, #3 // A3 + eor v16.16b, v16.16b, v0.16b // L = E + F + pmull v18.8h, v18.8b, v3.8b // J = A3*B + ext v0.8b, v3.8b, v3.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v0.8h, v5.8b, v0.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v3.8b, v3.8b, #4 // B4 + eor v18.16b, v18.16b, v0.16b // N = I + J + pmull v19.8h, v5.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v0.8h, v5.8b, v3.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v0.16b, v0.16b, v16.16b + eor v0.16b, v0.16b, v18.16b + eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing + ext v16.8b, v7.8b, v7.8b, #1 // A1 + pmull v16.8h, v16.8b, v3.8b // F = A1*B + ext v1.8b, v3.8b, v3.8b, #1 // B1 + pmull v1.8h, v7.8b, v1.8b // E = A*B1 + ext v17.8b, v7.8b, v7.8b, #2 // A2 + pmull v17.8h, v17.8b, v3.8b // H = A2*B + ext v19.8b, v3.8b, v3.8b, #2 // B2 + pmull v19.8h, v7.8b, v19.8b // G = A*B2 + ext v18.8b, v7.8b, v7.8b, #3 // A3 + eor v16.16b, v16.16b, v1.16b // L = E + F + pmull v18.8h, v18.8b, v3.8b // J = A3*B + ext v1.8b, v3.8b, v3.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v1.8h, v7.8b, v1.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v3.8b, v3.8b, #4 // B4 + eor v18.16b, v18.16b, v1.16b // N = I + J + pmull v19.8h, v7.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v1.8h, v7.8b, v3.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v1.16b, v1.16b, v16.16b + eor v1.16b, v1.16b, v18.16b + ext v16.8b, v6.8b, v6.8b, #1 // A1 + pmull v16.8h, v16.8b, v4.8b // F = A1*B + ext v2.8b, v4.8b, v4.8b, #1 // B1 + pmull v2.8h, v6.8b, v2.8b // E = A*B1 + ext v17.8b, v6.8b, v6.8b, #2 // A2 + pmull v17.8h, v17.8b, v4.8b // H = A2*B + ext v19.8b, v4.8b, v4.8b, #2 // B2 + pmull v19.8h, v6.8b, v19.8b // G = A*B2 + ext v18.8b, v6.8b, v6.8b, #3 // A3 + eor v16.16b, v16.16b, v2.16b // L = E + F + pmull v18.8h, v18.8b, v4.8b // J = A3*B + ext v2.8b, v4.8b, v4.8b, #3 // B3 + eor v17.16b, v17.16b, v19.16b // M = G + H + pmull v2.8h, v6.8b, v2.8b // I = A*B3 + + // Here we diverge from the 32-bit version. It computes the following + // (instructions reordered for clarity): + // + // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) + // vand $t0#hi, $t0#hi, $k48 + // veor $t0#lo, $t0#lo, $t0#hi + // + // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) + // vand $t1#hi, $t1#hi, $k32 + // veor $t1#lo, $t1#lo, $t1#hi + // + // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) + // vand $t2#hi, $t2#hi, $k16 + // veor $t2#lo, $t2#lo, $t2#hi + // + // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) + // vmov.i64 $t3#hi, #0 + // + // $kN is a mask with the bottom N bits set. AArch64 cannot compute on + // upper halves of SIMD registers, so we must split each half into + // separate registers. To compensate, we pair computations up and + // parallelize. + + ext v19.8b, v4.8b, v4.8b, #4 // B4 + eor v18.16b, v18.16b, v2.16b // N = I + J + pmull v19.8h, v6.8b, v19.8b // K = A*B4 + + // This can probably be scheduled more efficiently. For now, we just + // pair up independent instructions. + zip1 v20.2d, v16.2d, v17.2d + zip1 v22.2d, v18.2d, v19.2d + zip2 v21.2d, v16.2d, v17.2d + zip2 v23.2d, v18.2d, v19.2d + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + and v21.16b, v21.16b, v24.16b + and v23.16b, v23.16b, v25.16b + eor v20.16b, v20.16b, v21.16b + eor v22.16b, v22.16b, v23.16b + zip1 v16.2d, v20.2d, v21.2d + zip1 v18.2d, v22.2d, v23.2d + zip2 v17.2d, v20.2d, v21.2d + zip2 v19.2d, v22.2d, v23.2d + + ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 + ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 + pmull v2.8h, v6.8b, v4.8b // D = A*B + ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 + ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 + eor v16.16b, v16.16b, v17.16b + eor v18.16b, v18.16b, v19.16b + eor v2.16b, v2.16b, v16.16b + eor v2.16b, v2.16b, v18.16b + ext v16.16b, v0.16b, v2.16b, #8 + eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing + eor v1.16b, v1.16b, v2.16b + eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi + ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result + // This is a no-op due to the ins instruction below. + // ins v2.d[0], v1.d[1] + + // equivalent of reduction_avx from ghash-x86_64.pl + shl v17.2d, v0.2d, #57 // 1st phase + shl v18.2d, v0.2d, #62 + eor v18.16b, v18.16b, v17.16b // + shl v17.2d, v0.2d, #63 + eor v18.16b, v18.16b, v17.16b // + // Note Xm contains {Xl.d[1], Xh.d[0]}. + eor v18.16b, v18.16b, v1.16b + ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0] + ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1] + + ushr v18.2d, v0.2d, #1 // 2nd phase + eor v2.16b, v2.16b,v0.16b + eor v0.16b, v0.16b,v18.16b // + ushr v18.2d, v18.2d, #6 + ushr v0.2d, v0.2d, #1 // + eor v0.16b, v0.16b, v2.16b // + eor v0.16b, v0.16b, v18.16b // + + subs x3, x3, #16 + bne Loop_neon + + rev64 v0.16b, v0.16b // byteswap Xi and write + ext v0.16b, v0.16b, v0.16b, #8 + st1 {v0.16b}, [x0] + + ret + + +.section .rodata +.align 4 +Lmasks: +.quad 0x0000ffffffffffff // k48 +.quad 0x00000000ffffffff // k32 +.quad 0x000000000000ffff // k16 +.quad 0x0000000000000000 // k0 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-apple.S b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-apple.S new file mode 100644 index 0000000000..24b1f2f309 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-apple.S @@ -0,0 +1,288 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _gcm_gmult_ssse3 +.private_extern _gcm_gmult_ssse3 +.align 4 +_gcm_gmult_ssse3: +L_gcm_gmult_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + movdqu (%edi),%xmm0 + call L000pic_point +L000pic_point: + popl %eax + movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7 + movdqa Llow4_mask-L000pic_point(%eax),%xmm2 +.byte 102,15,56,0,199 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movl $5,%eax +L001loop_row_1: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz L001loop_row_1 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $5,%eax +L002loop_row_2: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz L002loop_row_2 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $6,%eax +L003loop_row_3: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz L003loop_row_3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,0,215 + movdqu %xmm2,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_ghash_ssse3 +.private_extern _gcm_ghash_ssse3 +.align 4 +_gcm_ghash_ssse3: +L_gcm_ghash_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%edx + movl 32(%esp),%ecx + movdqu (%edi),%xmm0 + call L004pic_point +L004pic_point: + popl %ebx + movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7 + andl $-16,%ecx +.byte 102,15,56,0,199 + pxor %xmm3,%xmm3 +L005loop_ghash: + movdqa Llow4_mask-L004pic_point(%ebx),%xmm2 + movdqu (%edx),%xmm1 +.byte 102,15,56,0,207 + pxor %xmm1,%xmm0 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + pxor %xmm2,%xmm2 + movl $5,%eax +L006loop_row_4: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz L006loop_row_4 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $5,%eax +L007loop_row_5: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz L007loop_row_5 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $6,%eax +L008loop_row_6: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz L008loop_row_6 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,%xmm0 + leal -256(%esi),%esi + leal 16(%edx),%edx + subl $16,%ecx + jnz L005loop_ghash +.byte 102,15,56,0,199 + movdqu %xmm0,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +Lreverse_bytes: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.align 4,0x90 +Llow4_mask: +.long 252645135,252645135,252645135,252645135 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-linux.S new file mode 100644 index 0000000000..445db3b4f7 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-linux.S @@ -0,0 +1,292 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl gcm_gmult_ssse3 +.hidden gcm_gmult_ssse3 +.type gcm_gmult_ssse3,@function +.align 16 +gcm_gmult_ssse3: +.L_gcm_gmult_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + movdqu (%edi),%xmm0 + call .L000pic_point +.L000pic_point: + popl %eax + movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7 + movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2 +.byte 102,15,56,0,199 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movl $5,%eax +.L001loop_row_1: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz .L001loop_row_1 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $5,%eax +.L002loop_row_2: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz .L002loop_row_2 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $6,%eax +.L003loop_row_3: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz .L003loop_row_3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,0,215 + movdqu %xmm2,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin +.globl gcm_ghash_ssse3 +.hidden gcm_ghash_ssse3 +.type gcm_ghash_ssse3,@function +.align 16 +gcm_ghash_ssse3: +.L_gcm_ghash_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%edx + movl 32(%esp),%ecx + movdqu (%edi),%xmm0 + call .L004pic_point +.L004pic_point: + popl %ebx + movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7 + andl $-16,%ecx +.byte 102,15,56,0,199 + pxor %xmm3,%xmm3 +.L005loop_ghash: + movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2 + movdqu (%edx),%xmm1 +.byte 102,15,56,0,207 + pxor %xmm1,%xmm0 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + pxor %xmm2,%xmm2 + movl $5,%eax +.L006loop_row_4: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz .L006loop_row_4 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $5,%eax +.L007loop_row_5: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz .L007loop_row_5 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movl $6,%eax +.L008loop_row_6: + movdqa (%esi),%xmm4 + leal 16(%esi),%esi + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + pxor %xmm5,%xmm2 + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + subl $1,%eax + jnz .L008loop_row_6 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,%xmm0 + leal -256(%esi),%esi + leal 16(%edx),%edx + subl $16,%ecx + jnz .L005loop_ghash +.byte 102,15,56,0,199 + movdqu %xmm0,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin +.align 16 +.Lreverse_bytes: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.align 16 +.Llow4_mask: +.long 252645135,252645135,252645135,252645135 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-win.asm b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-win.asm new file mode 100644 index 0000000000..52108aacc1 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-win.asm @@ -0,0 +1,297 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _gcm_gmult_ssse3 +align 16 +_gcm_gmult_ssse3: +L$_gcm_gmult_ssse3_begin: + push ebp + push ebx + push esi + push edi + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + movdqu xmm0,[edi] + call L$000pic_point +L$000pic_point: + pop eax + movdqa xmm7,[(L$reverse_bytes-L$000pic_point)+eax] + movdqa xmm2,[(L$low4_mask-L$000pic_point)+eax] +db 102,15,56,0,199 + movdqa xmm1,xmm2 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm2 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov eax,5 +L$001loop_row_1: + movdqa xmm4,[esi] + lea esi,[16+esi] + movdqa xmm6,xmm2 +db 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + movdqa xmm5,xmm4 +db 102,15,56,0,224 +db 102,15,56,0,233 + pxor xmm2,xmm5 + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + sub eax,1 + jnz NEAR L$001loop_row_1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov eax,5 +L$002loop_row_2: + movdqa xmm4,[esi] + lea esi,[16+esi] + movdqa xmm6,xmm2 +db 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + movdqa xmm5,xmm4 +db 102,15,56,0,224 +db 102,15,56,0,233 + pxor xmm2,xmm5 + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + sub eax,1 + jnz NEAR L$002loop_row_2 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov eax,6 +L$003loop_row_3: + movdqa xmm4,[esi] + lea esi,[16+esi] + movdqa xmm6,xmm2 +db 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + movdqa xmm5,xmm4 +db 102,15,56,0,224 +db 102,15,56,0,233 + pxor xmm2,xmm5 + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + sub eax,1 + jnz NEAR L$003loop_row_3 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 +db 102,15,56,0,215 + movdqu [edi],xmm2 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pop edi + pop esi + pop ebx + pop ebp + ret +global _gcm_ghash_ssse3 +align 16 +_gcm_ghash_ssse3: +L$_gcm_ghash_ssse3_begin: + push ebp + push ebx + push esi + push edi + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edx,DWORD [28+esp] + mov ecx,DWORD [32+esp] + movdqu xmm0,[edi] + call L$004pic_point +L$004pic_point: + pop ebx + movdqa xmm7,[(L$reverse_bytes-L$004pic_point)+ebx] + and ecx,-16 +db 102,15,56,0,199 + pxor xmm3,xmm3 +L$005loop_ghash: + movdqa xmm2,[(L$low4_mask-L$004pic_point)+ebx] + movdqu xmm1,[edx] +db 102,15,56,0,207 + pxor xmm0,xmm1 + movdqa xmm1,xmm2 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm2 + pxor xmm2,xmm2 + mov eax,5 +L$006loop_row_4: + movdqa xmm4,[esi] + lea esi,[16+esi] + movdqa xmm6,xmm2 +db 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + movdqa xmm5,xmm4 +db 102,15,56,0,224 +db 102,15,56,0,233 + pxor xmm2,xmm5 + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + sub eax,1 + jnz NEAR L$006loop_row_4 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov eax,5 +L$007loop_row_5: + movdqa xmm4,[esi] + lea esi,[16+esi] + movdqa xmm6,xmm2 +db 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + movdqa xmm5,xmm4 +db 102,15,56,0,224 +db 102,15,56,0,233 + pxor xmm2,xmm5 + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + sub eax,1 + jnz NEAR L$007loop_row_5 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov eax,6 +L$008loop_row_6: + movdqa xmm4,[esi] + lea esi,[16+esi] + movdqa xmm6,xmm2 +db 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + movdqa xmm5,xmm4 +db 102,15,56,0,224 +db 102,15,56,0,233 + pxor xmm2,xmm5 + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + sub eax,1 + jnz NEAR L$008loop_row_6 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + movdqa xmm0,xmm2 + lea esi,[esi-256] + lea edx,[16+edx] + sub ecx,16 + jnz NEAR L$005loop_ghash +db 102,15,56,0,199 + movdqu [edi],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +L$reverse_bytes: +db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +align 16 +L$low4_mask: +dd 252645135,252645135,252645135,252645135 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-apple.S new file mode 100644 index 0000000000..bcbf824f4a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-apple.S @@ -0,0 +1,423 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + + + + + +.globl _gcm_gmult_ssse3 +.private_extern _gcm_gmult_ssse3 +.p2align 4 +_gcm_gmult_ssse3: + + +_CET_ENDBR + movdqu (%rdi),%xmm0 + movdqa L$reverse_bytes(%rip),%xmm10 + movdqa L$low4_mask(%rip),%xmm2 + + +.byte 102,65,15,56,0,194 + + + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + + + + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +L$oop_row_1: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz L$oop_row_1 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +L$oop_row_2: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz L$oop_row_2 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $6,%rax +L$oop_row_3: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz L$oop_row_3 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + +.byte 102,65,15,56,0,210 + movdqu %xmm2,(%rdi) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + ret + + + + + + + + + +.globl _gcm_ghash_ssse3 +.private_extern _gcm_ghash_ssse3 +.p2align 4 +_gcm_ghash_ssse3: + + +_CET_ENDBR + movdqu (%rdi),%xmm0 + movdqa L$reverse_bytes(%rip),%xmm10 + movdqa L$low4_mask(%rip),%xmm11 + + + andq $-16,%rcx + + + +.byte 102,65,15,56,0,194 + + + pxor %xmm3,%xmm3 +L$oop_ghash: + + movdqu (%rdx),%xmm1 +.byte 102,65,15,56,0,202 + pxor %xmm1,%xmm0 + + + movdqa %xmm11,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm11,%xmm0 + + + + + pxor %xmm2,%xmm2 + + movq $5,%rax +L$oop_row_4: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz L$oop_row_4 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +L$oop_row_5: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz L$oop_row_5 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $6,%rax +L$oop_row_6: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz L$oop_row_6 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,%xmm0 + + + leaq -256(%rsi),%rsi + + + leaq 16(%rdx),%rdx + subq $16,%rcx + jnz L$oop_ghash + + +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + ret + + + + +.section __DATA,__const +.p2align 4 + + +L$reverse_bytes: +.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +L$low4_mask: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-linux.S new file mode 100644 index 0000000000..2acb448953 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-linux.S @@ -0,0 +1,423 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + + + + + +.type gcm_gmult_ssse3, @function +.globl gcm_gmult_ssse3 +.hidden gcm_gmult_ssse3 +.align 16 +gcm_gmult_ssse3: +.cfi_startproc + +_CET_ENDBR + movdqu (%rdi),%xmm0 + movdqa .Lreverse_bytes(%rip),%xmm10 + movdqa .Llow4_mask(%rip),%xmm2 + + +.byte 102,65,15,56,0,194 + + + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + + + + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +.Loop_row_1: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_1 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +.Loop_row_2: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_2 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $6,%rax +.Loop_row_3: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_3 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + +.byte 102,65,15,56,0,210 + movdqu %xmm2,(%rdi) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + ret +.cfi_endproc + +.size gcm_gmult_ssse3,.-gcm_gmult_ssse3 + + + + + +.type gcm_ghash_ssse3, @function +.globl gcm_ghash_ssse3 +.hidden gcm_ghash_ssse3 +.align 16 +gcm_ghash_ssse3: +.cfi_startproc + +_CET_ENDBR + movdqu (%rdi),%xmm0 + movdqa .Lreverse_bytes(%rip),%xmm10 + movdqa .Llow4_mask(%rip),%xmm11 + + + andq $-16,%rcx + + + +.byte 102,65,15,56,0,194 + + + pxor %xmm3,%xmm3 +.Loop_ghash: + + movdqu (%rdx),%xmm1 +.byte 102,65,15,56,0,202 + pxor %xmm1,%xmm0 + + + movdqa %xmm11,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm11,%xmm0 + + + + + pxor %xmm2,%xmm2 + + movq $5,%rax +.Loop_row_4: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_4 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +.Loop_row_5: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_5 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $6,%rax +.Loop_row_6: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_6 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,%xmm0 + + + leaq -256(%rsi),%rsi + + + leaq 16(%rdx),%rdx + subq $16,%rcx + jnz .Loop_ghash + + +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + ret +.cfi_endproc + +.size gcm_ghash_ssse3,.-gcm_ghash_ssse3 + +.section .rodata +.align 16 + + +.Lreverse_bytes: +.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.Llow4_mask: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-win.asm new file mode 100644 index 0000000000..84c5d40bd3 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86_64-win.asm @@ -0,0 +1,497 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + + + + + + +global gcm_gmult_ssse3 +ALIGN 16 +gcm_gmult_ssse3: + +$L$SEH_begin_gcm_gmult_ssse3_1: +_CET_ENDBR + sub rsp,40 +$L$SEH_prolog_gcm_gmult_ssse3_2: + movdqa XMMWORD[rsp],xmm6 +$L$SEH_prolog_gcm_gmult_ssse3_3: + movdqa XMMWORD[16+rsp],xmm10 +$L$SEH_prolog_gcm_gmult_ssse3_4: + movdqu xmm0,XMMWORD[rcx] + movdqa xmm10,XMMWORD[$L$reverse_bytes] + movdqa xmm2,XMMWORD[$L$low4_mask] + + +DB 102,65,15,56,0,194 + + + movdqa xmm1,xmm2 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm2 + + + + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov rax,5 +$L$oop_row_1: + movdqa xmm4,XMMWORD[rdx] + lea rdx,[16+rdx] + + + movdqa xmm6,xmm2 +DB 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + + + + + movdqa xmm5,xmm4 +DB 102,15,56,0,224 +DB 102,15,56,0,233 + + + pxor xmm2,xmm5 + + + + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + + + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + + sub rax,1 + jnz NEAR $L$oop_row_1 + + + + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov rax,5 +$L$oop_row_2: + movdqa xmm4,XMMWORD[rdx] + lea rdx,[16+rdx] + + + movdqa xmm6,xmm2 +DB 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + + + + + movdqa xmm5,xmm4 +DB 102,15,56,0,224 +DB 102,15,56,0,233 + + + pxor xmm2,xmm5 + + + + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + + + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + + sub rax,1 + jnz NEAR $L$oop_row_2 + + + + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov rax,6 +$L$oop_row_3: + movdqa xmm4,XMMWORD[rdx] + lea rdx,[16+rdx] + + + movdqa xmm6,xmm2 +DB 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + + + + + movdqa xmm5,xmm4 +DB 102,15,56,0,224 +DB 102,15,56,0,233 + + + pxor xmm2,xmm5 + + + + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + + + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + + sub rax,1 + jnz NEAR $L$oop_row_3 + + + + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + +DB 102,65,15,56,0,210 + movdqu XMMWORD[rcx],xmm2 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + movdqa xmm6,XMMWORD[rsp] + movdqa xmm10,XMMWORD[16+rsp] + add rsp,40 + ret + +$L$SEH_end_gcm_gmult_ssse3_5: + + + + + + + +global gcm_ghash_ssse3 +ALIGN 16 +gcm_ghash_ssse3: + +$L$SEH_begin_gcm_ghash_ssse3_1: +_CET_ENDBR + sub rsp,56 +$L$SEH_prolog_gcm_ghash_ssse3_2: + movdqa XMMWORD[rsp],xmm6 +$L$SEH_prolog_gcm_ghash_ssse3_3: + movdqa XMMWORD[16+rsp],xmm10 +$L$SEH_prolog_gcm_ghash_ssse3_4: + movdqa XMMWORD[32+rsp],xmm11 +$L$SEH_prolog_gcm_ghash_ssse3_5: + movdqu xmm0,XMMWORD[rcx] + movdqa xmm10,XMMWORD[$L$reverse_bytes] + movdqa xmm11,XMMWORD[$L$low4_mask] + + + and r9,-16 + + + +DB 102,65,15,56,0,194 + + + pxor xmm3,xmm3 +$L$oop_ghash: + + movdqu xmm1,XMMWORD[r8] +DB 102,65,15,56,0,202 + pxor xmm0,xmm1 + + + movdqa xmm1,xmm11 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm11 + + + + + pxor xmm2,xmm2 + + mov rax,5 +$L$oop_row_4: + movdqa xmm4,XMMWORD[rdx] + lea rdx,[16+rdx] + + + movdqa xmm6,xmm2 +DB 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + + + + + movdqa xmm5,xmm4 +DB 102,15,56,0,224 +DB 102,15,56,0,233 + + + pxor xmm2,xmm5 + + + + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + + + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + + sub rax,1 + jnz NEAR $L$oop_row_4 + + + + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov rax,5 +$L$oop_row_5: + movdqa xmm4,XMMWORD[rdx] + lea rdx,[16+rdx] + + + movdqa xmm6,xmm2 +DB 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + + + + + movdqa xmm5,xmm4 +DB 102,15,56,0,224 +DB 102,15,56,0,233 + + + pxor xmm2,xmm5 + + + + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + + + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + + sub rax,1 + jnz NEAR $L$oop_row_5 + + + + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + mov rax,6 +$L$oop_row_6: + movdqa xmm4,XMMWORD[rdx] + lea rdx,[16+rdx] + + + movdqa xmm6,xmm2 +DB 102,15,58,15,243,1 + movdqa xmm3,xmm6 + psrldq xmm2,1 + + + + + movdqa xmm5,xmm4 +DB 102,15,56,0,224 +DB 102,15,56,0,233 + + + pxor xmm2,xmm5 + + + + movdqa xmm5,xmm4 + psllq xmm5,60 + movdqa xmm6,xmm5 + pslldq xmm6,8 + pxor xmm3,xmm6 + + + psrldq xmm5,8 + pxor xmm2,xmm5 + psrlq xmm4,4 + pxor xmm2,xmm4 + + sub rax,1 + jnz NEAR $L$oop_row_6 + + + + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,1 + pxor xmm2,xmm3 + psrlq xmm3,5 + pxor xmm2,xmm3 + pxor xmm3,xmm3 + movdqa xmm0,xmm2 + + + lea rdx,[((-256))+rdx] + + + lea r8,[16+r8] + sub r9,16 + jnz NEAR $L$oop_ghash + + +DB 102,65,15,56,0,194 + movdqu XMMWORD[rcx],xmm0 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + movdqa xmm6,XMMWORD[rsp] + movdqa xmm10,XMMWORD[16+rsp] + movdqa xmm11,XMMWORD[32+rsp] + add rsp,56 + ret + +$L$SEH_end_gcm_ghash_ssse3_6: + + +section .rdata rdata align=8 +ALIGN 16 + + +$L$reverse_bytes: + DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 + +$L$low4_mask: + DQ 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f +section .text + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_gcm_gmult_ssse3_1 wrt ..imagebase + DD $L$SEH_end_gcm_gmult_ssse3_5 wrt ..imagebase + DD $L$SEH_info_gcm_gmult_ssse3_0 wrt ..imagebase + + DD $L$SEH_begin_gcm_ghash_ssse3_1 wrt ..imagebase + DD $L$SEH_end_gcm_ghash_ssse3_6 wrt ..imagebase + DD $L$SEH_info_gcm_ghash_ssse3_0 wrt ..imagebase + + +section .xdata rdata align=8 +ALIGN 4 +$L$SEH_info_gcm_gmult_ssse3_0: + DB 1 + DB $L$SEH_prolog_gcm_gmult_ssse3_4-$L$SEH_begin_gcm_gmult_ssse3_1 + DB 5 + DB 0 + DB $L$SEH_prolog_gcm_gmult_ssse3_4-$L$SEH_begin_gcm_gmult_ssse3_1 + DB 168 + DW 1 + DB $L$SEH_prolog_gcm_gmult_ssse3_3-$L$SEH_begin_gcm_gmult_ssse3_1 + DB 104 + DW 0 + DB $L$SEH_prolog_gcm_gmult_ssse3_2-$L$SEH_begin_gcm_gmult_ssse3_1 + DB 66 + +$L$SEH_info_gcm_ghash_ssse3_0: + DB 1 + DB $L$SEH_prolog_gcm_ghash_ssse3_5-$L$SEH_begin_gcm_ghash_ssse3_1 + DB 7 + DB 0 + DB $L$SEH_prolog_gcm_ghash_ssse3_5-$L$SEH_begin_gcm_ghash_ssse3_1 + DB 184 + DW 2 + DB $L$SEH_prolog_gcm_ghash_ssse3_4-$L$SEH_begin_gcm_ghash_ssse3_1 + DB 168 + DW 1 + DB $L$SEH_prolog_gcm_ghash_ssse3_3-$L$SEH_begin_gcm_ghash_ssse3_1 + DB 104 + DW 0 + DB $L$SEH_prolog_gcm_ghash_ssse3_2-$L$SEH_begin_gcm_ghash_ssse3_1 + DB 98 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-x86-apple.S b/yass/third_party/boringssl/src/gen/bcm/ghash-x86-apple.S new file mode 100644 index 0000000000..a178b74f37 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-x86-apple.S @@ -0,0 +1,322 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _gcm_init_clmul +.private_extern _gcm_init_clmul +.align 4 +_gcm_init_clmul: +L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call L000pic +L000pic: + popl %ecx + leal Lbswap-L000pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.globl _gcm_gmult_clmul +.private_extern _gcm_gmult_clmul +.align 4 +_gcm_gmult_clmul: +L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call L001pic +L001pic: + popl %ecx + leal Lbswap-L001pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.globl _gcm_ghash_clmul +.private_extern _gcm_ghash_clmul +.align 4 +_gcm_ghash_clmul: +L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call L002pic +L002pic: + popl %ecx + leal Lbswap-L002pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz L003odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe L004even_tail + jmp L005mod_loop +.align 5,0x90 +L005mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja L005mod_loop +L004even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz L006done + movups (%edx),%xmm2 +L003odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L006done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-x86-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghash-x86-linux.S new file mode 100644 index 0000000000..c897efcd96 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-x86-linux.S @@ -0,0 +1,328 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl gcm_init_clmul +.hidden gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call .L000pic +.L000pic: + popl %ecx + leal .Lbswap-.L000pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.size gcm_init_clmul,.-.L_gcm_init_clmul_begin +.globl gcm_gmult_clmul +.hidden gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call .L001pic +.L001pic: + popl %ecx + leal .Lbswap-.L001pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin +.globl gcm_ghash_clmul +.hidden gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: +.L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call .L002pic +.L002pic: + popl %ecx + leal .Lbswap-.L002pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz .L003odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe .L004even_tail + jmp .L005mod_loop +.align 32 +.L005mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja .L005mod_loop +.L004even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz .L006done + movups (%edx),%xmm2 +.L003odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.L006done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin +.align 64 +.Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-x86-win.asm b/yass/third_party/boringssl/src/gen/bcm/ghash-x86-win.asm new file mode 100644 index 0000000000..3f6c707477 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-x86-win.asm @@ -0,0 +1,330 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _gcm_init_clmul +align 16 +_gcm_init_clmul: +L$_gcm_init_clmul_begin: + mov edx,DWORD [4+esp] + mov eax,DWORD [8+esp] + call L$000pic +L$000pic: + pop ecx + lea ecx,[(L$bswap-L$000pic)+ecx] + movdqu xmm2,[eax] + pshufd xmm2,xmm2,78 + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + pand xmm5,[16+ecx] + pxor xmm2,xmm5 + movdqa xmm0,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu [edx],xmm2 + pxor xmm4,xmm0 + movdqu [16+edx],xmm0 +db 102,15,58,15,227,8 + movdqu [32+edx],xmm4 + ret +global _gcm_gmult_clmul +align 16 +_gcm_gmult_clmul: +L$_gcm_gmult_clmul_begin: + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + call L$001pic +L$001pic: + pop ecx + lea ecx,[(L$bswap-L$001pic)+ecx] + movdqu xmm0,[eax] + movdqa xmm5,[ecx] + movups xmm2,[edx] +db 102,15,56,0,197 + movups xmm4,[32+edx] + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +db 102,15,56,0,197 + movdqu [eax],xmm0 + ret +global _gcm_ghash_clmul +align 16 +_gcm_ghash_clmul: +L$_gcm_ghash_clmul_begin: + push ebp + push ebx + push esi + push edi + mov eax,DWORD [20+esp] + mov edx,DWORD [24+esp] + mov esi,DWORD [28+esp] + mov ebx,DWORD [32+esp] + call L$002pic +L$002pic: + pop ecx + lea ecx,[(L$bswap-L$002pic)+ecx] + movdqu xmm0,[eax] + movdqa xmm5,[ecx] + movdqu xmm2,[edx] +db 102,15,56,0,197 + sub ebx,16 + jz NEAR L$003odd_tail + movdqu xmm3,[esi] + movdqu xmm6,[16+esi] +db 102,15,56,0,221 +db 102,15,56,0,245 + movdqu xmm5,[32+edx] + pxor xmm0,xmm3 + pshufd xmm3,xmm6,78 + movdqa xmm7,xmm6 + pxor xmm3,xmm6 + lea esi,[32+esi] +db 102,15,58,68,242,0 +db 102,15,58,68,250,17 +db 102,15,58,68,221,0 + movups xmm2,[16+edx] + nop + sub ebx,32 + jbe NEAR L$004even_tail + jmp NEAR L$005mod_loop +align 32 +L$005mod_loop: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 + nop +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,229,16 + movups xmm2,[edx] + xorps xmm0,xmm6 + movdqa xmm5,[ecx] + xorps xmm1,xmm7 + movdqu xmm7,[esi] + pxor xmm3,xmm0 + movdqu xmm6,[16+esi] + pxor xmm3,xmm1 +db 102,15,56,0,253 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 +db 102,15,56,0,245 + pxor xmm1,xmm7 + movdqa xmm7,xmm6 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 +db 102,15,58,68,242,0 + movups xmm5,[32+edx] + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + pshufd xmm3,xmm7,78 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm3,xmm7 + pxor xmm1,xmm4 +db 102,15,58,68,250,17 + movups xmm2,[16+edx] + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +db 102,15,58,68,221,0 + lea esi,[32+esi] + sub ebx,32 + ja NEAR L$005mod_loop +L$004even_tail: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,229,16 + movdqa xmm5,[ecx] + xorps xmm0,xmm6 + xorps xmm1,xmm7 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test ebx,ebx + jnz NEAR L$006done + movups xmm2,[edx] +L$003odd_tail: + movdqu xmm3,[esi] +db 102,15,56,0,221 + pxor xmm0,xmm3 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +L$006done: +db 102,15,56,0,197 + movdqu [eax],xmm0 + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$bswap: +db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +db 0 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-apple.S new file mode 100644 index 0000000000..909d659661 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-apple.S @@ -0,0 +1,1125 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text +.globl _gcm_init_clmul +.private_extern _gcm_init_clmul + +.p2align 4 +_gcm_init_clmul: + + +_CET_ENDBR +L$_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand L$0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + ret + + + +.globl _gcm_gmult_clmul +.private_extern _gcm_gmult_clmul + +.p2align 4 +_gcm_gmult_clmul: + +_CET_ENDBR +L$_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa L$bswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + ret + + +.globl _gcm_ghash_clmul +.private_extern _gcm_ghash_clmul + +.p2align 5 +_gcm_ghash_clmul: + + +_CET_ENDBR +L$_ghash_clmul: + movdqa L$bswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $0x10,%rcx + jz L$odd_tail + + movdqu 16(%rsi),%xmm6 + cmpq $0x30,%rcx + jb L$skip4x + + subq $0x30,%rcx + movq $0xA040608020C0E000,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $0x40,%rcx + jc L$tail4x + + jmp L$mod4_loop +.p2align 5 +L$mod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa L$7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $0x40,%rcx + jnc L$mod4_loop + +L$tail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $0x40,%rcx + jz L$done + movdqu 32(%rsi),%xmm7 + subq $0x10,%rcx + jz L$odd_tail +L$skip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $0x20,%rcx + jbe L$even_tail + nop + jmp L$mod_loop + +.p2align 5 +L$mod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $0x20,%rcx + ja L$mod_loop + +L$even_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz L$done + +L$odd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L$done: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + ret + + + +.globl _gcm_init_avx +.private_extern _gcm_init_avx + +.p2align 5 +_gcm_init_avx: + +_CET_ENDBR + vzeroupper + + vmovdqu (%rsi),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp L$init_start_avx +.p2align 5 +L$init_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +L$init_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz L$init_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + ret + + + +.globl _gcm_gmult_avx +.private_extern _gcm_gmult_avx + +.p2align 5 +_gcm_gmult_avx: + +_CET_ENDBR + jmp L$_gmult_clmul + + +.globl _gcm_ghash_avx +.private_extern _gcm_ghash_avx + +.p2align 5 +_gcm_ghash_avx: + +_CET_ENDBR + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq L$0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu L$bswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $0x80,%rcx + jb L$short_avx + subq $0x80,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $0x80,%rcx + jb L$tail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $0x80,%rcx + jmp L$oop8x_avx + +.p2align 5 +L$oop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $0x80,%rcx + jnc L$oop8x_avx + + addq $0x80,%rcx + jmp L$tail_no_xor_avx + +.p2align 5 +L$short_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $0x10,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $0x10,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $0x10,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $0x10,%rcx + jmp L$tail_avx + +.p2align 5 +L$tail_avx: + vpxor %xmm10,%xmm15,%xmm15 +L$tail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne L$short_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + ret + + + +.section __DATA,__const +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +L$7_mask: +.long 7,0,7,0 +.p2align 6 + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-linux.S new file mode 100644 index 0000000000..22429a67b4 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-linux.S @@ -0,0 +1,1125 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text +.globl gcm_init_clmul +.hidden gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.cfi_startproc + +_CET_ENDBR +.L_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + ret +.cfi_endproc + +.size gcm_init_clmul,.-gcm_init_clmul +.globl gcm_gmult_clmul +.hidden gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.cfi_startproc +_CET_ENDBR +.L_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + ret +.cfi_endproc +.size gcm_gmult_clmul,.-gcm_gmult_clmul +.globl gcm_ghash_clmul +.hidden gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 32 +gcm_ghash_clmul: +.cfi_startproc + +_CET_ENDBR +.L_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $0x10,%rcx + jz .Lodd_tail + + movdqu 16(%rsi),%xmm6 + cmpq $0x30,%rcx + jb .Lskip4x + + subq $0x30,%rcx + movq $0xA040608020C0E000,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $0x40,%rcx + jc .Ltail4x + + jmp .Lmod4_loop +.align 32 +.Lmod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa .L7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $0x40,%rcx + jnc .Lmod4_loop + +.Ltail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $0x40,%rcx + jz .Ldone + movdqu 32(%rsi),%xmm7 + subq $0x10,%rcx + jz .Lodd_tail +.Lskip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $0x20,%rcx + jbe .Leven_tail + nop + jmp .Lmod_loop + +.align 32 +.Lmod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $0x20,%rcx + ja .Lmod_loop + +.Leven_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz .Ldone + +.Lodd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.Ldone: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + ret +.cfi_endproc + +.size gcm_ghash_clmul,.-gcm_ghash_clmul +.globl gcm_init_avx +.hidden gcm_init_avx +.type gcm_init_avx,@function +.align 32 +gcm_init_avx: +.cfi_startproc +_CET_ENDBR + vzeroupper + + vmovdqu (%rsi),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp .Linit_start_avx +.align 32 +.Linit_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +.Linit_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz .Linit_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + ret + +.cfi_endproc +.size gcm_init_avx,.-gcm_init_avx +.globl gcm_gmult_avx +.hidden gcm_gmult_avx +.type gcm_gmult_avx,@function +.align 32 +gcm_gmult_avx: +.cfi_startproc +_CET_ENDBR + jmp .L_gmult_clmul +.cfi_endproc +.size gcm_gmult_avx,.-gcm_gmult_avx +.globl gcm_ghash_avx +.hidden gcm_ghash_avx +.type gcm_ghash_avx,@function +.align 32 +gcm_ghash_avx: +.cfi_startproc +_CET_ENDBR + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq .L0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu .Lbswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $0x80,%rcx + jb .Lshort_avx + subq $0x80,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $0x80,%rcx + jb .Ltail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $0x80,%rcx + jmp .Loop8x_avx + +.align 32 +.Loop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $0x80,%rcx + jnc .Loop8x_avx + + addq $0x80,%rcx + jmp .Ltail_no_xor_avx + +.align 32 +.Lshort_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $0x10,%rcx + jmp .Ltail_avx + +.align 32 +.Ltail_avx: + vpxor %xmm10,%xmm15,%xmm15 +.Ltail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne .Lshort_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + ret +.cfi_endproc + +.size gcm_ghash_avx,.-gcm_ghash_avx +.section .rodata +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: +.long 7,0,7,0 +.align 64 + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-win.asm new file mode 100644 index 0000000000..41b189a2be --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghash-x86_64-win.asm @@ -0,0 +1,1336 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + +global gcm_init_clmul + +ALIGN 16 +gcm_init_clmul: + +$L$SEH_begin_gcm_init_clmul_1: +_CET_ENDBR +$L$_init_clmul: + sub rsp,0x18 +$L$SEH_prolog_gcm_init_clmul_2: + movaps XMMWORD[rsp],xmm6 +$L$SEH_prolog_gcm_init_clmul_3: + movdqu xmm2,XMMWORD[rdx] + pshufd xmm2,xmm2,78 + + + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + + + pand xmm5,XMMWORD[$L$0x1c2_polynomial] + pxor xmm2,xmm5 + + + pshufd xmm6,xmm2,78 + movdqa xmm0,xmm2 + pxor xmm6,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD[rcx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD[16+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD[32+rcx],xmm4 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm5,xmm0 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm5,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm5 + movdqu XMMWORD[48+rcx],xmm5 + pxor xmm4,xmm0 + movdqu XMMWORD[64+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD[80+rcx],xmm4 + movaps xmm6,XMMWORD[rsp] + lea rsp,[24+rsp] + ret + +$L$SEH_end_gcm_init_clmul_4: + +global gcm_gmult_clmul + +ALIGN 16 +gcm_gmult_clmul: + +_CET_ENDBR +$L$_gmult_clmul: + movdqu xmm0,XMMWORD[rcx] + movdqa xmm5,XMMWORD[$L$bswap_mask] + movdqu xmm2,XMMWORD[rdx] + movdqu xmm4,XMMWORD[32+rdx] +DB 102,15,56,0,197 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,56,0,197 + movdqu XMMWORD[rcx],xmm0 + ret + + +global gcm_ghash_clmul + +ALIGN 32 +gcm_ghash_clmul: + +$L$SEH_begin_gcm_ghash_clmul_1: +_CET_ENDBR +$L$_ghash_clmul: + lea rax,[((-136))+rsp] + lea rsp,[((-32))+rax] +$L$SEH_prolog_gcm_ghash_clmul_2: + movaps XMMWORD[(-32)+rax],xmm6 +$L$SEH_prolog_gcm_ghash_clmul_3: + movaps XMMWORD[(-16)+rax],xmm7 +$L$SEH_prolog_gcm_ghash_clmul_4: + movaps XMMWORD[rax],xmm8 +$L$SEH_prolog_gcm_ghash_clmul_5: + movaps XMMWORD[16+rax],xmm9 +$L$SEH_prolog_gcm_ghash_clmul_6: + movaps XMMWORD[32+rax],xmm10 +$L$SEH_prolog_gcm_ghash_clmul_7: + movaps XMMWORD[48+rax],xmm11 +$L$SEH_prolog_gcm_ghash_clmul_8: + movaps XMMWORD[64+rax],xmm12 +$L$SEH_prolog_gcm_ghash_clmul_9: + movaps XMMWORD[80+rax],xmm13 +$L$SEH_prolog_gcm_ghash_clmul_10: + movaps XMMWORD[96+rax],xmm14 +$L$SEH_prolog_gcm_ghash_clmul_11: + movaps XMMWORD[112+rax],xmm15 +$L$SEH_prolog_gcm_ghash_clmul_12: + movdqa xmm10,XMMWORD[$L$bswap_mask] + + movdqu xmm0,XMMWORD[rcx] + movdqu xmm2,XMMWORD[rdx] + movdqu xmm7,XMMWORD[32+rdx] +DB 102,65,15,56,0,194 + + sub r9,0x10 + jz NEAR $L$odd_tail + + movdqu xmm6,XMMWORD[16+rdx] + cmp r9,0x30 + jb NEAR $L$skip4x + + sub r9,0x30 + mov rax,0xA040608020C0E000 + movdqu xmm14,XMMWORD[48+rdx] + movdqu xmm15,XMMWORD[64+rdx] + + + + + movdqu xmm3,XMMWORD[48+r8] + movdqu xmm11,XMMWORD[32+r8] +DB 102,65,15,56,0,218 +DB 102,69,15,56,0,218 + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm12,xmm11 +DB 102,68,15,58,68,222,0 +DB 102,68,15,58,68,238,17 +DB 102,68,15,58,68,231,16 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + movups xmm7,XMMWORD[80+rdx] + xorps xmm4,xmm12 + + movdqu xmm11,XMMWORD[16+r8] + movdqu xmm8,XMMWORD[r8] +DB 102,69,15,56,0,218 +DB 102,69,15,56,0,194 + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + movdqa xmm1,xmm0 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 +DB 102,69,15,58,68,238,17 +DB 102,68,15,58,68,231,0 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + + lea r8,[64+r8] + sub r9,0x40 + jc NEAR $L$tail4x + + jmp NEAR $L$mod4_loop +ALIGN 32 +$L$mod4_loop: +DB 102,65,15,58,68,199,0 + xorps xmm4,xmm12 + movdqu xmm11,XMMWORD[48+r8] +DB 102,69,15,56,0,218 +DB 102,65,15,58,68,207,17 + xorps xmm0,xmm3 + movdqu xmm3,XMMWORD[32+r8] + movdqa xmm13,xmm11 +DB 102,68,15,58,68,199,16 + pshufd xmm12,xmm11,78 + xorps xmm1,xmm5 + pxor xmm12,xmm11 +DB 102,65,15,56,0,218 + movups xmm7,XMMWORD[32+rdx] + xorps xmm8,xmm4 +DB 102,68,15,58,68,218,0 + pshufd xmm4,xmm3,78 + + pxor xmm8,xmm0 + movdqa xmm5,xmm3 + pxor xmm8,xmm1 + pxor xmm4,xmm3 + movdqa xmm9,xmm8 +DB 102,68,15,58,68,234,17 + pslldq xmm8,8 + psrldq xmm9,8 + pxor xmm0,xmm8 + movdqa xmm8,XMMWORD[$L$7_mask] + pxor xmm1,xmm9 +DB 102,76,15,110,200 + + pand xmm8,xmm0 +DB 102,69,15,56,0,200 + pxor xmm9,xmm0 +DB 102,68,15,58,68,231,0 + psllq xmm9,57 + movdqa xmm8,xmm9 + pslldq xmm9,8 +DB 102,15,58,68,222,0 + psrldq xmm8,8 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + movdqu xmm8,XMMWORD[r8] + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,238,17 + xorps xmm3,xmm11 + movdqu xmm11,XMMWORD[16+r8] +DB 102,69,15,56,0,218 +DB 102,15,58,68,231,16 + xorps xmm5,xmm13 + movups xmm7,XMMWORD[80+rdx] +DB 102,69,15,56,0,194 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + + movdqa xmm13,xmm11 + pxor xmm4,xmm12 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm1,xmm0 +DB 102,69,15,58,68,238,17 + xorps xmm3,xmm11 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 + +DB 102,68,15,58,68,231,0 + xorps xmm5,xmm13 + + lea r8,[64+r8] + sub r9,0x40 + jnc NEAR $L$mod4_loop + +$L$tail4x: +DB 102,65,15,58,68,199,0 +DB 102,65,15,58,68,207,17 +DB 102,68,15,58,68,199,16 + xorps xmm4,xmm12 + xorps xmm0,xmm3 + xorps xmm1,xmm5 + pxor xmm1,xmm0 + pxor xmm8,xmm4 + + pxor xmm8,xmm1 + pxor xmm1,xmm0 + + movdqa xmm9,xmm8 + psrldq xmm8,8 + pslldq xmm9,8 + pxor xmm1,xmm8 + pxor xmm0,xmm9 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + add r9,0x40 + jz NEAR $L$done + movdqu xmm7,XMMWORD[32+rdx] + sub r9,0x10 + jz NEAR $L$odd_tail +$L$skip4x: + + + + + + movdqu xmm8,XMMWORD[r8] + movdqu xmm3,XMMWORD[16+r8] +DB 102,69,15,56,0,194 +DB 102,65,15,56,0,218 + pxor xmm0,xmm8 + + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + lea r8,[32+r8] + nop + sub r9,0x20 + jbe NEAR $L$even_tail + nop + jmp NEAR $L$mod_loop + +ALIGN 32 +$L$mod_loop: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + movdqu xmm9,XMMWORD[r8] + pxor xmm8,xmm0 +DB 102,69,15,56,0,202 + movdqu xmm3,XMMWORD[16+r8] + + pxor xmm8,xmm1 + pxor xmm1,xmm9 + pxor xmm4,xmm8 +DB 102,65,15,56,0,218 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm5,xmm3 + + movdqa xmm9,xmm0 + movdqa xmm8,xmm0 + psllq xmm0,5 + pxor xmm8,xmm0 +DB 102,15,58,68,218,0 + psllq xmm0,1 + pxor xmm0,xmm8 + psllq xmm0,57 + movdqa xmm8,xmm0 + pslldq xmm0,8 + psrldq xmm8,8 + pxor xmm0,xmm9 + pshufd xmm4,xmm5,78 + pxor xmm1,xmm8 + pxor xmm4,xmm5 + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,234,17 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm9 + lea r8,[32+r8] + psrlq xmm0,1 +DB 102,15,58,68,231,0 + pxor xmm0,xmm1 + + sub r9,0x20 + ja NEAR $L$mod_loop + +$L$even_tail: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + pxor xmm8,xmm0 + pxor xmm8,xmm1 + pxor xmm4,xmm8 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test r9,r9 + jnz NEAR $L$done + +$L$odd_tail: + movdqu xmm8,XMMWORD[r8] +DB 102,69,15,56,0,194 + pxor xmm0,xmm8 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,223,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +$L$done: +DB 102,65,15,56,0,194 + movdqu XMMWORD[rcx],xmm0 + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[168+rsp] + ret + +$L$SEH_end_gcm_ghash_clmul_13: + +global gcm_init_avx + +ALIGN 32 +gcm_init_avx: + +_CET_ENDBR +$L$SEH_begin_gcm_init_avx_1: + sub rsp,0x18 +$L$SEH_prolog_gcm_init_avx_2: + movaps XMMWORD[rsp],xmm6 +$L$SEH_prolog_gcm_init_avx_3: + vzeroupper + + vmovdqu xmm2,XMMWORD[rdx] + vpshufd xmm2,xmm2,78 + + + vpshufd xmm4,xmm2,255 + vpsrlq xmm3,xmm2,63 + vpsllq xmm2,xmm2,1 + vpxor xmm5,xmm5,xmm5 + vpcmpgtd xmm5,xmm5,xmm4 + vpslldq xmm3,xmm3,8 + vpor xmm2,xmm2,xmm3 + + + vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial] + vpxor xmm2,xmm2,xmm5 + + vpunpckhqdq xmm6,xmm2,xmm2 + vmovdqa xmm0,xmm2 + vpxor xmm6,xmm6,xmm2 + mov r10,4 + jmp NEAR $L$init_start_avx +ALIGN 32 +$L$init_loop_avx: + vpalignr xmm5,xmm4,xmm3,8 + vmovdqu XMMWORD[(-16)+rcx],xmm5 + vpunpckhqdq xmm3,xmm0,xmm0 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm1,xmm0,xmm2,0x11 + vpclmulqdq xmm0,xmm0,xmm2,0x00 + vpclmulqdq xmm3,xmm3,xmm6,0x00 + vpxor xmm4,xmm1,xmm0 + vpxor xmm3,xmm3,xmm4 + + vpslldq xmm4,xmm3,8 + vpsrldq xmm3,xmm3,8 + vpxor xmm0,xmm0,xmm4 + vpxor xmm1,xmm1,xmm3 + vpsllq xmm3,xmm0,57 + vpsllq xmm4,xmm0,62 + vpxor xmm4,xmm4,xmm3 + vpsllq xmm3,xmm0,63 + vpxor xmm4,xmm4,xmm3 + vpslldq xmm3,xmm4,8 + vpsrldq xmm4,xmm4,8 + vpxor xmm0,xmm0,xmm3 + vpxor xmm1,xmm1,xmm4 + + vpsrlq xmm4,xmm0,1 + vpxor xmm1,xmm1,xmm0 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm4,xmm4,5 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm0,xmm0,1 + vpxor xmm0,xmm0,xmm1 +$L$init_start_avx: + vmovdqa xmm5,xmm0 + vpunpckhqdq xmm3,xmm0,xmm0 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm1,xmm0,xmm2,0x11 + vpclmulqdq xmm0,xmm0,xmm2,0x00 + vpclmulqdq xmm3,xmm3,xmm6,0x00 + vpxor xmm4,xmm1,xmm0 + vpxor xmm3,xmm3,xmm4 + + vpslldq xmm4,xmm3,8 + vpsrldq xmm3,xmm3,8 + vpxor xmm0,xmm0,xmm4 + vpxor xmm1,xmm1,xmm3 + vpsllq xmm3,xmm0,57 + vpsllq xmm4,xmm0,62 + vpxor xmm4,xmm4,xmm3 + vpsllq xmm3,xmm0,63 + vpxor xmm4,xmm4,xmm3 + vpslldq xmm3,xmm4,8 + vpsrldq xmm4,xmm4,8 + vpxor xmm0,xmm0,xmm3 + vpxor xmm1,xmm1,xmm4 + + vpsrlq xmm4,xmm0,1 + vpxor xmm1,xmm1,xmm0 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm4,xmm4,5 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm0,xmm0,1 + vpxor xmm0,xmm0,xmm1 + vpshufd xmm3,xmm5,78 + vpshufd xmm4,xmm0,78 + vpxor xmm3,xmm3,xmm5 + vmovdqu XMMWORD[rcx],xmm5 + vpxor xmm4,xmm4,xmm0 + vmovdqu XMMWORD[16+rcx],xmm0 + lea rcx,[48+rcx] + sub r10,1 + jnz NEAR $L$init_loop_avx + + vpalignr xmm5,xmm3,xmm4,8 + vmovdqu XMMWORD[(-16)+rcx],xmm5 + + vzeroupper + movaps xmm6,XMMWORD[rsp] + lea rsp,[24+rsp] + ret +$L$SEH_end_gcm_init_avx_4: + + +global gcm_gmult_avx + +ALIGN 32 +gcm_gmult_avx: + +_CET_ENDBR + jmp NEAR $L$_gmult_clmul + + +global gcm_ghash_avx + +ALIGN 32 +gcm_ghash_avx: + +_CET_ENDBR +$L$SEH_begin_gcm_ghash_avx_1: + lea rax,[((-136))+rsp] + lea rsp,[((-32))+rax] +$L$SEH_prolog_gcm_ghash_avx_2: + movaps XMMWORD[(-32)+rax],xmm6 +$L$SEH_prolog_gcm_ghash_avx_3: + movaps XMMWORD[(-16)+rax],xmm7 +$L$SEH_prolog_gcm_ghash_avx_4: + movaps XMMWORD[rax],xmm8 +$L$SEH_prolog_gcm_ghash_avx_5: + movaps XMMWORD[16+rax],xmm9 +$L$SEH_prolog_gcm_ghash_avx_6: + movaps XMMWORD[32+rax],xmm10 +$L$SEH_prolog_gcm_ghash_avx_7: + movaps XMMWORD[48+rax],xmm11 +$L$SEH_prolog_gcm_ghash_avx_8: + movaps XMMWORD[64+rax],xmm12 +$L$SEH_prolog_gcm_ghash_avx_9: + movaps XMMWORD[80+rax],xmm13 +$L$SEH_prolog_gcm_ghash_avx_10: + movaps XMMWORD[96+rax],xmm14 +$L$SEH_prolog_gcm_ghash_avx_11: + movaps XMMWORD[112+rax],xmm15 +$L$SEH_prolog_gcm_ghash_avx_12: + vzeroupper + + vmovdqu xmm10,XMMWORD[rcx] + lea r10,[$L$0x1c2_polynomial] + lea rdx,[64+rdx] + vmovdqu xmm13,XMMWORD[$L$bswap_mask] + vpshufb xmm10,xmm10,xmm13 + cmp r9,0x80 + jb NEAR $L$short_avx + sub r9,0x80 + + vmovdqu xmm14,XMMWORD[112+r8] + vmovdqu xmm6,XMMWORD[((0-64))+rdx] + vpshufb xmm14,xmm14,xmm13 + vmovdqu xmm7,XMMWORD[((32-64))+rdx] + + vpunpckhqdq xmm9,xmm14,xmm14 + vmovdqu xmm15,XMMWORD[96+r8] + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpxor xmm9,xmm9,xmm14 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((16-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vmovdqu xmm14,XMMWORD[80+r8] + vpclmulqdq xmm2,xmm9,xmm7,0x00 + vpxor xmm8,xmm8,xmm15 + + vpshufb xmm14,xmm14,xmm13 + vpclmulqdq xmm3,xmm15,xmm6,0x00 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm4,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((48-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vmovdqu xmm15,XMMWORD[64+r8] + vpclmulqdq xmm5,xmm8,xmm7,0x10 + vmovdqu xmm7,XMMWORD[((80-64))+rdx] + + vpshufb xmm15,xmm15,xmm13 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpxor xmm4,xmm4,xmm1 + vpunpckhqdq xmm8,xmm15,xmm15 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((64-64))+rdx] + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm9,xmm7,0x00 + vpxor xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD[48+r8] + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm3,xmm15,xmm6,0x00 + vpxor xmm1,xmm1,xmm4 + vpshufb xmm14,xmm14,xmm13 + vpclmulqdq xmm4,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((96-64))+rdx] + vpxor xmm2,xmm2,xmm5 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm5,xmm8,xmm7,0x10 + vmovdqu xmm7,XMMWORD[((128-64))+rdx] + vpxor xmm9,xmm9,xmm14 + + vmovdqu xmm15,XMMWORD[32+r8] + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpxor xmm4,xmm4,xmm1 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((112-64))+rdx] + vpxor xmm5,xmm5,xmm2 + vpunpckhqdq xmm8,xmm15,xmm15 + vpclmulqdq xmm2,xmm9,xmm7,0x00 + vpxor xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD[16+r8] + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm3,xmm15,xmm6,0x00 + vpxor xmm1,xmm1,xmm4 + vpshufb xmm14,xmm14,xmm13 + vpclmulqdq xmm4,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((144-64))+rdx] + vpxor xmm2,xmm2,xmm5 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm5,xmm8,xmm7,0x10 + vmovdqu xmm7,XMMWORD[((176-64))+rdx] + vpxor xmm9,xmm9,xmm14 + + vmovdqu xmm15,XMMWORD[r8] + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpxor xmm4,xmm4,xmm1 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((160-64))+rdx] + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm9,xmm7,0x10 + + lea r8,[128+r8] + cmp r9,0x80 + jb NEAR $L$tail_avx + + vpxor xmm15,xmm15,xmm10 + sub r9,0x80 + jmp NEAR $L$oop8x_avx + +ALIGN 32 +$L$oop8x_avx: + vpunpckhqdq xmm8,xmm15,xmm15 + vmovdqu xmm14,XMMWORD[112+r8] + vpxor xmm3,xmm3,xmm0 + vpxor xmm8,xmm8,xmm15 + vpclmulqdq xmm10,xmm15,xmm6,0x00 + vpshufb xmm14,xmm14,xmm13 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm11,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((0-64))+rdx] + vpunpckhqdq xmm9,xmm14,xmm14 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm12,xmm8,xmm7,0x00 + vmovdqu xmm7,XMMWORD[((32-64))+rdx] + vpxor xmm9,xmm9,xmm14 + + vmovdqu xmm15,XMMWORD[96+r8] + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpxor xmm10,xmm10,xmm3 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vxorps xmm11,xmm11,xmm4 + vmovdqu xmm6,XMMWORD[((16-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vpclmulqdq xmm2,xmm9,xmm7,0x00 + vpxor xmm12,xmm12,xmm5 + vxorps xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD[80+r8] + vpxor xmm12,xmm12,xmm10 + vpclmulqdq xmm3,xmm15,xmm6,0x00 + vpxor xmm12,xmm12,xmm11 + vpslldq xmm9,xmm12,8 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm4,xmm15,xmm6,0x11 + vpsrldq xmm12,xmm12,8 + vpxor xmm10,xmm10,xmm9 + vmovdqu xmm6,XMMWORD[((48-64))+rdx] + vpshufb xmm14,xmm14,xmm13 + vxorps xmm11,xmm11,xmm12 + vpxor xmm4,xmm4,xmm1 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm5,xmm8,xmm7,0x10 + vmovdqu xmm7,XMMWORD[((80-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm15,XMMWORD[64+r8] + vpalignr xmm12,xmm10,xmm10,8 + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpshufb xmm15,xmm15,xmm13 + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((64-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm2,xmm9,xmm7,0x00 + vxorps xmm8,xmm8,xmm15 + vpxor xmm2,xmm2,xmm5 + + vmovdqu xmm14,XMMWORD[48+r8] + vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 + vpclmulqdq xmm3,xmm15,xmm6,0x00 + vpshufb xmm14,xmm14,xmm13 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm4,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((96-64))+rdx] + vpunpckhqdq xmm9,xmm14,xmm14 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm5,xmm8,xmm7,0x10 + vmovdqu xmm7,XMMWORD[((128-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm15,XMMWORD[32+r8] + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpshufb xmm15,xmm15,xmm13 + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((112-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm2,xmm9,xmm7,0x00 + vpxor xmm8,xmm8,xmm15 + vpxor xmm2,xmm2,xmm5 + vxorps xmm10,xmm10,xmm12 + + vmovdqu xmm14,XMMWORD[16+r8] + vpalignr xmm12,xmm10,xmm10,8 + vpclmulqdq xmm3,xmm15,xmm6,0x00 + vpshufb xmm14,xmm14,xmm13 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm4,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((144-64))+rdx] + vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 + vxorps xmm12,xmm12,xmm11 + vpunpckhqdq xmm9,xmm14,xmm14 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm5,xmm8,xmm7,0x10 + vmovdqu xmm7,XMMWORD[((176-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm15,XMMWORD[r8] + vpclmulqdq xmm0,xmm14,xmm6,0x00 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((160-64))+rdx] + vpxor xmm15,xmm15,xmm12 + vpclmulqdq xmm2,xmm9,xmm7,0x10 + vpxor xmm15,xmm15,xmm10 + + lea r8,[128+r8] + sub r9,0x80 + jnc NEAR $L$oop8x_avx + + add r9,0x80 + jmp NEAR $L$tail_no_xor_avx + +ALIGN 32 +$L$short_avx: + vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8] + lea r8,[r9*1+r8] + vmovdqu xmm6,XMMWORD[((0-64))+rdx] + vmovdqu xmm7,XMMWORD[((32-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + + vmovdqa xmm3,xmm0 + vmovdqa xmm4,xmm1 + vmovdqa xmm5,xmm2 + sub r9,0x10 + jz NEAR $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD[((-32))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((16-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + vpsrldq xmm7,xmm7,8 + sub r9,0x10 + jz NEAR $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD[((-48))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((48-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + vmovdqu xmm7,XMMWORD[((80-64))+rdx] + sub r9,0x10 + jz NEAR $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD[((-64))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((64-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + vpsrldq xmm7,xmm7,8 + sub r9,0x10 + jz NEAR $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD[((-80))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((96-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + vmovdqu xmm7,XMMWORD[((128-64))+rdx] + sub r9,0x10 + jz NEAR $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD[((-96))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((112-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + vpsrldq xmm7,xmm7,8 + sub r9,0x10 + jz NEAR $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD[((-112))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vmovdqu xmm6,XMMWORD[((144-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + vmovq xmm7,QWORD[((184-64))+rdx] + sub r9,0x10 + jmp NEAR $L$tail_avx + +ALIGN 32 +$L$tail_avx: + vpxor xmm15,xmm15,xmm10 +$L$tail_no_xor_avx: + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,0x00 + vpxor xmm8,xmm8,xmm15 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,0x11 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,0x00 + + vmovdqu xmm12,XMMWORD[r10] + + vpxor xmm10,xmm3,xmm0 + vpxor xmm11,xmm4,xmm1 + vpxor xmm5,xmm5,xmm2 + + vpxor xmm5,xmm5,xmm10 + vpxor xmm5,xmm5,xmm11 + vpslldq xmm9,xmm5,8 + vpsrldq xmm5,xmm5,8 + vpxor xmm10,xmm10,xmm9 + vpxor xmm11,xmm11,xmm5 + + vpclmulqdq xmm9,xmm10,xmm12,0x10 + vpalignr xmm10,xmm10,xmm10,8 + vpxor xmm10,xmm10,xmm9 + + vpclmulqdq xmm9,xmm10,xmm12,0x10 + vpalignr xmm10,xmm10,xmm10,8 + vpxor xmm10,xmm10,xmm11 + vpxor xmm10,xmm10,xmm9 + + cmp r9,0 + jne NEAR $L$short_avx + + vpshufb xmm10,xmm10,xmm13 + vmovdqu XMMWORD[rcx],xmm10 + vzeroupper + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[168+rsp] + ret + +$L$SEH_end_gcm_ghash_avx_13: + +section .rdata rdata align=8 +ALIGN 64 +$L$bswap_mask: + DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$0x1c2_polynomial: + DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +$L$7_mask: + DD 7,0,7,0 +ALIGN 64 + + DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 + DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 + DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 + DB 114,103,62,0 +ALIGN 64 +section .text + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_gcm_init_clmul_1 wrt ..imagebase + DD $L$SEH_end_gcm_init_clmul_4 wrt ..imagebase + DD $L$SEH_info_gcm_init_clmul_0 wrt ..imagebase + + DD $L$SEH_begin_gcm_ghash_clmul_1 wrt ..imagebase + DD $L$SEH_end_gcm_ghash_clmul_13 wrt ..imagebase + DD $L$SEH_info_gcm_ghash_clmul_0 wrt ..imagebase + + DD $L$SEH_begin_gcm_init_avx_1 wrt ..imagebase + DD $L$SEH_end_gcm_init_avx_4 wrt ..imagebase + DD $L$SEH_info_gcm_init_avx_0 wrt ..imagebase + + DD $L$SEH_begin_gcm_ghash_avx_1 wrt ..imagebase + DD $L$SEH_end_gcm_ghash_avx_13 wrt ..imagebase + DD $L$SEH_info_gcm_ghash_avx_0 wrt ..imagebase + + +section .xdata rdata align=8 +ALIGN 4 +$L$SEH_info_gcm_init_clmul_0: + DB 1 + DB $L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1 + DB 3 + DB 0 + DB $L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1 + DB 104 + DW 0 + DB $L$SEH_prolog_gcm_init_clmul_2-$L$SEH_begin_gcm_init_clmul_1 + DB 34 + +$L$SEH_info_gcm_ghash_clmul_0: + DB 1 + DB $L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1 + DB 22 + DB 0 + DB $L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1 + DB 248 + DW 9 + DB $L$SEH_prolog_gcm_ghash_clmul_11-$L$SEH_begin_gcm_ghash_clmul_1 + DB 232 + DW 8 + DB $L$SEH_prolog_gcm_ghash_clmul_10-$L$SEH_begin_gcm_ghash_clmul_1 + DB 216 + DW 7 + DB $L$SEH_prolog_gcm_ghash_clmul_9-$L$SEH_begin_gcm_ghash_clmul_1 + DB 200 + DW 6 + DB $L$SEH_prolog_gcm_ghash_clmul_8-$L$SEH_begin_gcm_ghash_clmul_1 + DB 184 + DW 5 + DB $L$SEH_prolog_gcm_ghash_clmul_7-$L$SEH_begin_gcm_ghash_clmul_1 + DB 168 + DW 4 + DB $L$SEH_prolog_gcm_ghash_clmul_6-$L$SEH_begin_gcm_ghash_clmul_1 + DB 152 + DW 3 + DB $L$SEH_prolog_gcm_ghash_clmul_5-$L$SEH_begin_gcm_ghash_clmul_1 + DB 136 + DW 2 + DB $L$SEH_prolog_gcm_ghash_clmul_4-$L$SEH_begin_gcm_ghash_clmul_1 + DB 120 + DW 1 + DB $L$SEH_prolog_gcm_ghash_clmul_3-$L$SEH_begin_gcm_ghash_clmul_1 + DB 104 + DW 0 + DB $L$SEH_prolog_gcm_ghash_clmul_2-$L$SEH_begin_gcm_ghash_clmul_1 + DB 1 + DW 21 + +$L$SEH_info_gcm_init_avx_0: + DB 1 + DB $L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1 + DB 3 + DB 0 + DB $L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1 + DB 104 + DW 0 + DB $L$SEH_prolog_gcm_init_avx_2-$L$SEH_begin_gcm_init_avx_1 + DB 34 + +$L$SEH_info_gcm_ghash_avx_0: + DB 1 + DB $L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1 + DB 22 + DB 0 + DB $L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1 + DB 248 + DW 9 + DB $L$SEH_prolog_gcm_ghash_avx_11-$L$SEH_begin_gcm_ghash_avx_1 + DB 232 + DW 8 + DB $L$SEH_prolog_gcm_ghash_avx_10-$L$SEH_begin_gcm_ghash_avx_1 + DB 216 + DW 7 + DB $L$SEH_prolog_gcm_ghash_avx_9-$L$SEH_begin_gcm_ghash_avx_1 + DB 200 + DW 6 + DB $L$SEH_prolog_gcm_ghash_avx_8-$L$SEH_begin_gcm_ghash_avx_1 + DB 184 + DW 5 + DB $L$SEH_prolog_gcm_ghash_avx_7-$L$SEH_begin_gcm_ghash_avx_1 + DB 168 + DW 4 + DB $L$SEH_prolog_gcm_ghash_avx_6-$L$SEH_begin_gcm_ghash_avx_1 + DB 152 + DW 3 + DB $L$SEH_prolog_gcm_ghash_avx_5-$L$SEH_begin_gcm_ghash_avx_1 + DB 136 + DW 2 + DB $L$SEH_prolog_gcm_ghash_avx_4-$L$SEH_begin_gcm_ghash_avx_1 + DB 120 + DW 1 + DB $L$SEH_prolog_gcm_ghash_avx_3-$L$SEH_begin_gcm_ghash_avx_1 + DB 104 + DW 0 + DB $L$SEH_prolog_gcm_ghash_avx_2-$L$SEH_begin_gcm_ghash_avx_1 + DB 1 + DW 21 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv7-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv7-linux.S new file mode 100644 index 0000000000..fab4c124c2 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv7-linux.S @@ -0,0 +1,246 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +#include + +#if __ARM_MAX_ARCH__>=7 +.text +.fpu neon +.code 32 +#undef __thumb2__ +.globl gcm_init_v8 +.hidden gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + AARCH64_VALID_CALL_TARGET + vld1.64 {q9},[r1] @ load input H + vmov.i8 q11,#0xe1 + vshl.i64 q11,q11,#57 @ 0xc2.0 + vext.8 q3,q9,q9,#8 + vshr.u64 q10,q11,#63 + vdup.32 q9,d18[1] + vext.8 q8,q10,q11,#8 @ t0=0xc2....01 + vshr.u64 q10,q3,#63 + vshr.s32 q9,q9,#31 @ broadcast carry bit + vand q10,q10,q8 + vshl.i64 q3,q3,#1 + vext.8 q10,q10,q10,#8 + vand q8,q8,q9 + vorr q3,q3,q10 @ H<<<=1 + veor q12,q3,q8 @ twisted H + vst1.64 {q12},[r0]! @ store Htable[0] + + @ calculate H^2 + vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing +.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12 + veor q8,q8,q12 +.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12 +.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q14,q0,q10 + + vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing + veor q9,q9,q14 + vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed + vst1.64 {q13,q14},[r0]! @ store Htable[1..2] + bx lr +.size gcm_init_v8,.-gcm_init_v8 +.globl gcm_gmult_v8 +.hidden gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + AARCH64_VALID_CALL_TARGET + vld1.64 {q9},[r0] @ load Xi + vmov.i8 q11,#0xe1 + vld1.64 {q12,q13},[r1] @ load twisted H, ... + vshl.u64 q11,q11,#57 +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q3,q9,q9,#8 + +.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing +.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi +.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_gmult_v8,.-gcm_gmult_v8 +.globl gcm_ghash_v8 +.hidden gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + AARCH64_VALID_CALL_TARGET + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so + vld1.64 {q0},[r0] @ load [rotated] Xi + @ "[rotated]" means that + @ loaded value would have + @ to be rotated in order to + @ make it appear as in + @ algorithm specification + subs r3,r3,#32 @ see if r3 is 32 or larger + mov r12,#16 @ r12 is used as post- + @ increment for input pointer; + @ as loop is modulo-scheduled + @ r12 is zeroed just in time + @ to preclude overstepping + @ inp[len], which means that + @ last block[s] are actually + @ loaded twice, but last + @ copy is not processed + vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2 + vmov.i8 q11,#0xe1 + vld1.64 {q14},[r1] + moveq r12,#0 @ is it time to zero r12? + vext.8 q0,q0,q0,#8 @ rotate Xi + vld1.64 {q8},[r2]! @ load [rotated] I[0] + vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant +#ifndef __ARMEB__ + vrev64.8 q8,q8 + vrev64.8 q0,q0 +#endif + vext.8 q3,q8,q8,#8 @ rotate I[0] + blo .Lodd_tail_v8 @ r3 was less than 32 + vld1.64 {q9},[r2],r12 @ load [rotated] I[1] +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q7,q9,q9,#8 + veor q3,q3,q0 @ I[i]^=Xi +.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q9,q9,q7 @ Karatsuba pre-processing +.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + vext.8 q10,q3,q3,#8 + subs r3,r3,#32 @ is there more data? +.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo + movlo r12,#0 @ is it time to zero r12? + +.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9 + veor q10,q10,q3 @ Karatsuba pre-processing +.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi + veor q0,q0,q4 @ accumulate +.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2] + + veor q2,q2,q6 + moveq r12,#0 @ is it time to zero r12? + veor q1,q1,q5 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3] +#ifndef __ARMEB__ + vrev64.8 q8,q8 +#endif + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + vext.8 q7,q9,q9,#8 + vext.8 q3,q8,q8,#8 + veor q0,q1,q10 +.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q3,q3,q2 @ accumulate q3 early + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q3,q3,q10 + veor q9,q9,q7 @ Karatsuba pre-processing + veor q3,q3,q0 +.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + bhs .Loop_mod2x_v8 @ there was at least 32 more bytes + + veor q2,q2,q10 + vext.8 q3,q8,q8,#8 @ re-construct q3 + adds r3,r3,#32 @ re-construct r3 + veor q0,q0,q2 @ re-construct q0 + beq .Ldone_v8 @ is r3 zero? +.Lodd_tail_v8: + vext.8 q10,q0,q0,#8 + veor q3,q3,q0 @ inp^=Xi + veor q9,q8,q10 @ q9 is rotated inp^Xi + +.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing +.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi +.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + +.Ldone_v8: +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so + bx lr +.size gcm_ghash_v8,.-gcm_ghash_v8 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-apple.S new file mode 100644 index 0000000000..6bc8a4f269 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-apple.S @@ -0,0 +1,565 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +#if __ARM_MAX_ARCH__>=7 +.text + +.globl _gcm_init_v8 +.private_extern _gcm_init_v8 + +.align 4 +_gcm_init_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 + ext v3.16b,v17.16b,v17.16b,#8 + ushr v18.2d,v19.2d,#63 + dup v17.4s,v17.s[1] + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v18.16b,v18.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v18.16b,v18.16b,v18.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2] + //calculate H^3 and H^4 + pmull v0.1q,v20.1d, v22.1d + pmull v5.1q,v22.1d,v22.1d + pmull2 v2.1q,v20.2d, v22.2d + pmull2 v7.1q,v22.2d,v22.2d + pmull v1.1q,v16.1d,v17.1d + pmull v6.1q,v17.1d,v17.1d + + ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + ext v17.16b,v5.16b,v7.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v16.16b + eor v4.16b,v5.16b,v7.16b + eor v6.16b,v6.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + eor v6.16b,v6.16b,v4.16b + pmull v4.1q,v5.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v7.d[0],v6.d[1] + ins v1.d[1],v0.d[0] + ins v6.d[1],v5.d[0] + eor v0.16b,v1.16b,v18.16b + eor v5.16b,v6.16b,v4.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v4.16b,v5.16b,v5.16b,#8 + pmull v0.1q,v0.1d,v19.1d + pmull v5.1q,v5.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v4.16b,v4.16b,v7.16b + eor v20.16b, v0.16b,v18.16b //H^3 + eor v22.16b,v5.16b,v4.16b //H^4 + + ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing + ext v17.16b,v22.16b,v22.16b,#8 + eor v16.16b,v16.16b,v20.16b + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5] + ret + +.globl _gcm_gmult_v8 +.private_extern _gcm_gmult_v8 + +.align 4 +_gcm_gmult_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... + shl v19.2d,v19.2d,#57 +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v3.16b,v17.16b,v17.16b,#8 + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret + +.globl _gcm_ghash_v8 +.private_extern _gcm_ghash_v8 + +.align 4 +_gcm_ghash_v8: + AARCH64_VALID_CALL_TARGET + cmp x3,#64 + b.hs Lgcm_ghash_v8_4x + ld1 {v0.2d},[x0] //load [rotated] Xi + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //algorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude overstepping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b + rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b Loop_mod2x_v8 + +.align 4 +Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq Ldone_v8 //is x3 zero? +Lodd_tail_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +Ldone_v8: +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret + + +.align 4 +gcm_ghash_v8_4x: +Lgcm_ghash_v8_4x: + ld1 {v0.2d},[x0] //load [rotated] Xi + ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4 + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant + + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + ext v25.16b,v7.16b,v7.16b,#8 + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + pmull2 v31.1q,v20.2d,v25.2d + pmull v30.1q,v21.1d,v7.1d + + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#128 + b.lo Ltail4x + + b Loop4x + +.align 4 +Loop4x: + eor v16.16b,v4.16b,v0.16b + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 + ext v3.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + ext v25.16b,v7.16b,v7.16b,#8 + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + ext v24.16b,v6.16b,v6.16b,#8 + eor v1.16b,v1.16b,v30.16b + ext v23.16b,v5.16b,v5.16b,#8 + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + eor v1.16b,v1.16b,v17.16b + pmull2 v31.1q,v20.2d,v25.2d + eor v1.16b,v1.16b,v18.16b + pmull v30.1q,v21.1d,v7.1d + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + eor v0.16b,v1.16b,v18.16b + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + eor v18.16b,v18.16b,v2.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v0.16b,v0.16b,v18.16b + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#64 + b.hs Loop4x + +Ltail4x: + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + + adds x3,x3,#64 + b.eq Ldone4x + + cmp x3,#32 + b.lo Lone + b.eq Ltwo +Lthree: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d,v6.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + pmull v29.1q,v20.1d,v24.1d //H·Ii+2 + eor v6.16b,v6.16b,v24.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + pmull2 v31.1q,v20.2d,v24.2d + pmull v30.1q,v21.1d,v6.1d + eor v0.16b,v0.16b,v18.16b + pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1 + eor v5.16b,v5.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull2 v23.1q,v22.2d,v23.2d + eor v16.16b,v4.16b,v0.16b + pmull2 v5.1q,v21.2d,v5.2d + ext v3.16b,v16.16b,v16.16b,#8 + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v26.2d,v3.2d + pmull v1.1q,v27.1d,v16.1d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b Ldone4x + +.align 4 +Ltwo: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull v29.1q,v20.1d,v23.1d //H·Ii+1 + eor v5.16b,v5.16b,v23.16b + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull2 v31.1q,v20.2d,v23.2d + pmull v30.1q,v21.1d,v5.1d + + pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v22.2d,v3.2d + pmull2 v1.1q,v21.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b Ldone4x + +.align 4 +Lone: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v20.1d,v3.1d + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v20.2d,v3.2d + pmull v1.1q,v21.1d,v16.1d + +Ldone4x: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + st1 {v0.2d},[x0] //write out Xi + + ret + +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-linux.S new file mode 100644 index 0000000000..de6f71294a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-linux.S @@ -0,0 +1,565 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv8-a+crypto +.globl gcm_init_v8 +.hidden gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 + ext v3.16b,v17.16b,v17.16b,#8 + ushr v18.2d,v19.2d,#63 + dup v17.4s,v17.s[1] + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v18.16b,v18.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v18.16b,v18.16b,v18.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2] + //calculate H^3 and H^4 + pmull v0.1q,v20.1d, v22.1d + pmull v5.1q,v22.1d,v22.1d + pmull2 v2.1q,v20.2d, v22.2d + pmull2 v7.1q,v22.2d,v22.2d + pmull v1.1q,v16.1d,v17.1d + pmull v6.1q,v17.1d,v17.1d + + ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + ext v17.16b,v5.16b,v7.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v16.16b + eor v4.16b,v5.16b,v7.16b + eor v6.16b,v6.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + eor v6.16b,v6.16b,v4.16b + pmull v4.1q,v5.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v7.d[0],v6.d[1] + ins v1.d[1],v0.d[0] + ins v6.d[1],v5.d[0] + eor v0.16b,v1.16b,v18.16b + eor v5.16b,v6.16b,v4.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v4.16b,v5.16b,v5.16b,#8 + pmull v0.1q,v0.1d,v19.1d + pmull v5.1q,v5.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v4.16b,v4.16b,v7.16b + eor v20.16b, v0.16b,v18.16b //H^3 + eor v22.16b,v5.16b,v4.16b //H^4 + + ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing + ext v17.16b,v22.16b,v22.16b,#8 + eor v16.16b,v16.16b,v20.16b + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5] + ret +.size gcm_init_v8,.-gcm_init_v8 +.globl gcm_gmult_v8 +.hidden gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... + shl v19.2d,v19.2d,#57 +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v3.16b,v17.16b,v17.16b,#8 + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 +.globl gcm_ghash_v8 +.hidden gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + AARCH64_VALID_CALL_TARGET + cmp x3,#64 + b.hs .Lgcm_ghash_v8_4x + ld1 {v0.2d},[x0] //load [rotated] Xi + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //algorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude overstepping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b + rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo .Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs .Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq .Ldone_v8 //is x3 zero? +.Lodd_tail_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +.Ldone_v8: +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.type gcm_ghash_v8_4x,%function +.align 4 +gcm_ghash_v8_4x: +.Lgcm_ghash_v8_4x: + ld1 {v0.2d},[x0] //load [rotated] Xi + ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4 + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant + + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + ext v25.16b,v7.16b,v7.16b,#8 + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + pmull2 v31.1q,v20.2d,v25.2d + pmull v30.1q,v21.1d,v7.1d + + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#128 + b.lo .Ltail4x + + b .Loop4x + +.align 4 +.Loop4x: + eor v16.16b,v4.16b,v0.16b + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 + ext v3.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + ext v25.16b,v7.16b,v7.16b,#8 + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + ext v24.16b,v6.16b,v6.16b,#8 + eor v1.16b,v1.16b,v30.16b + ext v23.16b,v5.16b,v5.16b,#8 + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + eor v1.16b,v1.16b,v17.16b + pmull2 v31.1q,v20.2d,v25.2d + eor v1.16b,v1.16b,v18.16b + pmull v30.1q,v21.1d,v7.1d + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + eor v0.16b,v1.16b,v18.16b + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + eor v18.16b,v18.16b,v2.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v0.16b,v0.16b,v18.16b + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#64 + b.hs .Loop4x + +.Ltail4x: + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + + adds x3,x3,#64 + b.eq .Ldone4x + + cmp x3,#32 + b.lo .Lone + b.eq .Ltwo +.Lthree: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d,v6.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + pmull v29.1q,v20.1d,v24.1d //H·Ii+2 + eor v6.16b,v6.16b,v24.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + pmull2 v31.1q,v20.2d,v24.2d + pmull v30.1q,v21.1d,v6.1d + eor v0.16b,v0.16b,v18.16b + pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1 + eor v5.16b,v5.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull2 v23.1q,v22.2d,v23.2d + eor v16.16b,v4.16b,v0.16b + pmull2 v5.1q,v21.2d,v5.2d + ext v3.16b,v16.16b,v16.16b,#8 + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v26.2d,v3.2d + pmull v1.1q,v27.1d,v16.1d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b .Ldone4x + +.align 4 +.Ltwo: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull v29.1q,v20.1d,v23.1d //H·Ii+1 + eor v5.16b,v5.16b,v23.16b + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull2 v31.1q,v20.2d,v23.2d + pmull v30.1q,v21.1d,v5.1d + + pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v22.2d,v3.2d + pmull2 v1.1q,v21.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b .Ldone4x + +.align 4 +.Lone: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v20.1d,v3.1d + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v20.2d,v3.2d + pmull v1.1q,v21.1d,v16.1d + +.Ldone4x: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-win.S new file mode 100644 index 0000000000..0be9ac6768 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/ghashv8-armv8-win.S @@ -0,0 +1,573 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv8-a+crypto +.globl gcm_init_v8 + +.def gcm_init_v8 + .type 32 +.endef +.align 4 +gcm_init_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 + ext v3.16b,v17.16b,v17.16b,#8 + ushr v18.2d,v19.2d,#63 + dup v17.4s,v17.s[1] + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v18.16b,v18.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v18.16b,v18.16b,v18.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2] + //calculate H^3 and H^4 + pmull v0.1q,v20.1d, v22.1d + pmull v5.1q,v22.1d,v22.1d + pmull2 v2.1q,v20.2d, v22.2d + pmull2 v7.1q,v22.2d,v22.2d + pmull v1.1q,v16.1d,v17.1d + pmull v6.1q,v17.1d,v17.1d + + ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + ext v17.16b,v5.16b,v7.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v16.16b + eor v4.16b,v5.16b,v7.16b + eor v6.16b,v6.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + eor v6.16b,v6.16b,v4.16b + pmull v4.1q,v5.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v7.d[0],v6.d[1] + ins v1.d[1],v0.d[0] + ins v6.d[1],v5.d[0] + eor v0.16b,v1.16b,v18.16b + eor v5.16b,v6.16b,v4.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v4.16b,v5.16b,v5.16b,#8 + pmull v0.1q,v0.1d,v19.1d + pmull v5.1q,v5.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v4.16b,v4.16b,v7.16b + eor v20.16b, v0.16b,v18.16b //H^3 + eor v22.16b,v5.16b,v4.16b //H^4 + + ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing + ext v17.16b,v22.16b,v22.16b,#8 + eor v16.16b,v16.16b,v20.16b + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5] + ret + +.globl gcm_gmult_v8 + +.def gcm_gmult_v8 + .type 32 +.endef +.align 4 +gcm_gmult_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... + shl v19.2d,v19.2d,#57 +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v3.16b,v17.16b,v17.16b,#8 + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret + +.globl gcm_ghash_v8 + +.def gcm_ghash_v8 + .type 32 +.endef +.align 4 +gcm_ghash_v8: + AARCH64_VALID_CALL_TARGET + cmp x3,#64 + b.hs Lgcm_ghash_v8_4x + ld1 {v0.2d},[x0] //load [rotated] Xi + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //algorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude overstepping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b + rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b Loop_mod2x_v8 + +.align 4 +Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq Ldone_v8 //is x3 zero? +Lodd_tail_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +Ldone_v8: +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret + +.def gcm_ghash_v8_4x + .type 32 +.endef +.align 4 +gcm_ghash_v8_4x: +Lgcm_ghash_v8_4x: + ld1 {v0.2d},[x0] //load [rotated] Xi + ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4 + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant + + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + ext v25.16b,v7.16b,v7.16b,#8 + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + pmull2 v31.1q,v20.2d,v25.2d + pmull v30.1q,v21.1d,v7.1d + + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#128 + b.lo Ltail4x + + b Loop4x + +.align 4 +Loop4x: + eor v16.16b,v4.16b,v0.16b + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 + ext v3.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + ext v25.16b,v7.16b,v7.16b,#8 + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + ext v24.16b,v6.16b,v6.16b,#8 + eor v1.16b,v1.16b,v30.16b + ext v23.16b,v5.16b,v5.16b,#8 + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + eor v1.16b,v1.16b,v17.16b + pmull2 v31.1q,v20.2d,v25.2d + eor v1.16b,v1.16b,v18.16b + pmull v30.1q,v21.1d,v7.1d + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + eor v0.16b,v1.16b,v18.16b + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + eor v18.16b,v18.16b,v2.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v0.16b,v0.16b,v18.16b + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#64 + b.hs Loop4x + +Ltail4x: + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + + adds x3,x3,#64 + b.eq Ldone4x + + cmp x3,#32 + b.lo Lone + b.eq Ltwo +Lthree: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d,v6.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + pmull v29.1q,v20.1d,v24.1d //H·Ii+2 + eor v6.16b,v6.16b,v24.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + pmull2 v31.1q,v20.2d,v24.2d + pmull v30.1q,v21.1d,v6.1d + eor v0.16b,v0.16b,v18.16b + pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1 + eor v5.16b,v5.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull2 v23.1q,v22.2d,v23.2d + eor v16.16b,v4.16b,v0.16b + pmull2 v5.1q,v21.2d,v5.2d + ext v3.16b,v16.16b,v16.16b,#8 + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v26.2d,v3.2d + pmull v1.1q,v27.1d,v16.1d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b Ldone4x + +.align 4 +Ltwo: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull v29.1q,v20.1d,v23.1d //H·Ii+1 + eor v5.16b,v5.16b,v23.16b + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull2 v31.1q,v20.2d,v23.2d + pmull v30.1q,v21.1d,v5.1d + + pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v22.2d,v3.2d + pmull2 v1.1q,v21.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b Ldone4x + +.align 4 +Lone: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v20.1d,v3.1d + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v20.2d,v3.2d + pmull v1.1q,v21.1d,v16.1d + +Ldone4x: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + st1 {v0.2d},[x0] //write out Xi + + ret + +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/md5-586-apple.S b/yass/third_party/boringssl/src/gen/bcm/md5-586-apple.S new file mode 100644 index 0000000000..986d590097 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/md5-586-apple.S @@ -0,0 +1,684 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _md5_block_asm_data_order +.private_extern _md5_block_asm_data_order +.align 4 +_md5_block_asm_data_order: +L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +L000start: + + # R0 section + movl %ecx,%edi + movl (%esi),%ebp + # R0 0 + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + # R0 1 + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + # R0 2 + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + # R0 3 + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + # R0 4 + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + # R0 5 + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + # R0 6 + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + # R0 7 + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + # R0 8 + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + # R0 9 + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + # R0 10 + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + # R0 11 + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + # R0 12 + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + # R0 13 + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + # R0 14 + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + # R0 15 + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + # R1 section + # R1 16 + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 17 + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 18 + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 19 + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 20 + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 21 + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 22 + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 23 + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 24 + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 25 + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 26 + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 27 + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 28 + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 29 + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 30 + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 31 + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + # R2 section + # R2 32 + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + # R2 33 + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 34 + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + # R2 35 + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 36 + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + # R2 37 + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 38 + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + # R2 39 + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 40 + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + # R2 41 + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 42 + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + # R2 43 + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 44 + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + # R2 45 + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 46 + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + # R2 47 + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + # R3 section + # R3 48 + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 49 + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 50 + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 51 + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 52 + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 53 + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 54 + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 55 + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 56 + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 57 + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 58 + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 59 + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 60 + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 61 + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 62 + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 63 + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/md5-586-linux.S b/yass/third_party/boringssl/src/gen/bcm/md5-586-linux.S new file mode 100644 index 0000000000..a297f2bc14 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/md5-586-linux.S @@ -0,0 +1,686 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl md5_block_asm_data_order +.hidden md5_block_asm_data_order +.type md5_block_asm_data_order,@function +.align 16 +md5_block_asm_data_order: +.L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +.L000start: + + + movl %ecx,%edi + movl (%esi),%ebp + + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + + + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae .L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/md5-586-win.asm b/yass/third_party/boringssl/src/gen/bcm/md5-586-win.asm new file mode 100644 index 0000000000..25592b8d46 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/md5-586-win.asm @@ -0,0 +1,694 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _md5_block_asm_data_order +align 16 +_md5_block_asm_data_order: +L$_md5_block_asm_data_order_begin: + push esi + push edi + mov edi,DWORD [12+esp] + mov esi,DWORD [16+esp] + mov ecx,DWORD [20+esp] + push ebp + shl ecx,6 + push ebx + add ecx,esi + sub ecx,64 + mov eax,DWORD [edi] + push ecx + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] +L$000start: + ; + ; R0 section + mov edi,ecx + mov ebp,DWORD [esi] + ; R0 0 + xor edi,edx + and edi,ebx + lea eax,[3614090360+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [4+esi] + add eax,ebx + ; R0 1 + xor edi,ecx + and edi,eax + lea edx,[3905402710+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [8+esi] + add edx,eax + ; R0 2 + xor edi,ebx + and edi,edx + lea ecx,[606105819+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [12+esi] + add ecx,edx + ; R0 3 + xor edi,eax + and edi,ecx + lea ebx,[3250441966+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [16+esi] + add ebx,ecx + ; R0 4 + xor edi,edx + and edi,ebx + lea eax,[4118548399+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [20+esi] + add eax,ebx + ; R0 5 + xor edi,ecx + and edi,eax + lea edx,[1200080426+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [24+esi] + add edx,eax + ; R0 6 + xor edi,ebx + and edi,edx + lea ecx,[2821735955+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [28+esi] + add ecx,edx + ; R0 7 + xor edi,eax + and edi,ecx + lea ebx,[4249261313+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [32+esi] + add ebx,ecx + ; R0 8 + xor edi,edx + and edi,ebx + lea eax,[1770035416+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [36+esi] + add eax,ebx + ; R0 9 + xor edi,ecx + and edi,eax + lea edx,[2336552879+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [40+esi] + add edx,eax + ; R0 10 + xor edi,ebx + and edi,edx + lea ecx,[4294925233+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [44+esi] + add ecx,edx + ; R0 11 + xor edi,eax + and edi,ecx + lea ebx,[2304563134+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [48+esi] + add ebx,ecx + ; R0 12 + xor edi,edx + and edi,ebx + lea eax,[1804603682+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [52+esi] + add eax,ebx + ; R0 13 + xor edi,ecx + and edi,eax + lea edx,[4254626195+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [56+esi] + add edx,eax + ; R0 14 + xor edi,ebx + and edi,edx + lea ecx,[2792965006+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [60+esi] + add ecx,edx + ; R0 15 + xor edi,eax + and edi,ecx + lea ebx,[1236535329+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [4+esi] + add ebx,ecx + ; + ; R1 section + ; R1 16 + lea eax,[4129170786+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [24+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 17 + lea edx,[3225465664+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [44+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 18 + lea ecx,[643717713+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 19 + lea ebx,[3921069994+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [20+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 20 + lea eax,[3593408605+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [40+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 21 + lea edx,[38016083+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [60+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 22 + lea ecx,[3634488961+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [16+esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 23 + lea ebx,[3889429448+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [36+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 24 + lea eax,[568446438+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [56+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 25 + lea edx,[3275163606+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [12+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 26 + lea ecx,[4107603335+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [32+esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 27 + lea ebx,[1163531501+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [52+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 28 + lea eax,[2850285829+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [8+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 29 + lea edx,[4243563512+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [28+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 30 + lea ecx,[1735328473+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [48+esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 31 + lea ebx,[2368359562+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [20+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; + ; R2 section + ; R2 32 + xor edi,edx + xor edi,ebx + lea eax,[4294588738+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [32+esi] + mov edi,ebx + ; R2 33 + lea edx,[2272392833+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [44+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 34 + xor edi,ebx + xor edi,edx + lea ecx,[1839030562+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [56+esi] + mov edi,edx + ; R2 35 + lea ebx,[4259657740+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [4+esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 36 + xor edi,edx + xor edi,ebx + lea eax,[2763975236+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [16+esi] + mov edi,ebx + ; R2 37 + lea edx,[1272893353+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [28+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 38 + xor edi,ebx + xor edi,edx + lea ecx,[4139469664+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [40+esi] + mov edi,edx + ; R2 39 + lea ebx,[3200236656+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [52+esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 40 + xor edi,edx + xor edi,ebx + lea eax,[681279174+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [esi] + mov edi,ebx + ; R2 41 + lea edx,[3936430074+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [12+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 42 + xor edi,ebx + xor edi,edx + lea ecx,[3572445317+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [24+esi] + mov edi,edx + ; R2 43 + lea ebx,[76029189+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [36+esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 44 + xor edi,edx + xor edi,ebx + lea eax,[3654602809+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [48+esi] + mov edi,ebx + ; R2 45 + lea edx,[3873151461+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [60+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 46 + xor edi,ebx + xor edi,edx + lea ecx,[530742520+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [8+esi] + mov edi,edx + ; R2 47 + lea ebx,[3299628645+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [esi] + add ebx,edi + mov edi,-1 + rol ebx,23 + add ebx,ecx + ; + ; R3 section + ; R3 48 + xor edi,edx + or edi,ebx + lea eax,[4096336452+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [28+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 49 + or edi,eax + lea edx,[1126891415+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [56+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 50 + or edi,edx + lea ecx,[2878612391+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [20+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 51 + or edi,ecx + lea ebx,[4237533241+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [48+esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 52 + or edi,ebx + lea eax,[1700485571+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [12+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 53 + or edi,eax + lea edx,[2399980690+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [40+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 54 + or edi,edx + lea ecx,[4293915773+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [4+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 55 + or edi,ecx + lea ebx,[2240044497+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [32+esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 56 + or edi,ebx + lea eax,[1873313359+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [60+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 57 + or edi,eax + lea edx,[4264355552+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [24+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 58 + or edi,edx + lea ecx,[2734768916+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [52+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 59 + or edi,ecx + lea ebx,[1309151649+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [16+esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 60 + or edi,ebx + lea eax,[4149444226+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [44+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 61 + or edi,eax + lea edx,[3174756917+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [8+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 62 + or edi,edx + lea ecx,[718787259+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [36+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 63 + or edi,ecx + lea ebx,[3951481745+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [24+esp] + add ebx,edi + add esi,64 + rol ebx,21 + mov edi,DWORD [ebp] + add ebx,ecx + add eax,edi + mov edi,DWORD [4+ebp] + add ebx,edi + mov edi,DWORD [8+ebp] + add ecx,edi + mov edi,DWORD [12+ebp] + add edx,edi + mov DWORD [ebp],eax + mov DWORD [4+ebp],ebx + mov edi,DWORD [esp] + mov DWORD [8+ebp],ecx + mov DWORD [12+ebp],edx + cmp edi,esi + jae NEAR L$000start + pop eax + pop ebx + pop ebp + pop edi + pop esi + ret +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-apple.S new file mode 100644 index 0000000000..e4c02415e2 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-apple.S @@ -0,0 +1,690 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text +.p2align 4 + +.globl _md5_block_asm_data_order +.private_extern _md5_block_asm_data_order + +_md5_block_asm_data_order: + +_CET_ENDBR + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r14 + + pushq %r15 + +L$prologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je L$end + + +L$loop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $0xffffffff,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb L$loop + + +L$end: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r12 + + movq 24(%rsp),%rbx + + movq 32(%rsp),%rbp + + addq $40,%rsp + +L$epilogue: + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-linux.S new file mode 100644 index 0000000000..7b93662a42 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-linux.S @@ -0,0 +1,695 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text +.align 16 + +.globl md5_block_asm_data_order +.hidden md5_block_asm_data_order +.type md5_block_asm_data_order,@function +md5_block_asm_data_order: +.cfi_startproc +_CET_ENDBR + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12,-32 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14,-40 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15,-48 +.Lprologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je .Lend + + +.Lloop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $0xffffffff,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb .Lloop + + +.Lend: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 +.cfi_restore r15 + movq 8(%rsp),%r14 +.cfi_restore r14 + movq 16(%rsp),%r12 +.cfi_restore r12 + movq 24(%rsp),%rbx +.cfi_restore rbx + movq 32(%rsp),%rbp +.cfi_restore rbp + addq $40,%rsp +.cfi_adjust_cfa_offset -40 +.Lepilogue: + ret +.cfi_endproc +.size md5_block_asm_data_order,.-md5_block_asm_data_order +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-win.asm new file mode 100644 index 0000000000..f6c5b6276b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/md5-x86_64-win.asm @@ -0,0 +1,803 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + +ALIGN 16 + +global md5_block_asm_data_order + +md5_block_asm_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_md5_block_asm_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + push rbp + + push rbx + + push r12 + + push r14 + + push r15 + +$L$prologue: + + + + + mov rbp,rdi + shl rdx,6 + lea rdi,[rdx*1+rsi] + mov eax,DWORD[rbp] + mov ebx,DWORD[4+rbp] + mov ecx,DWORD[8+rbp] + mov edx,DWORD[12+rbp] + + + + + + + + cmp rsi,rdi + je NEAR $L$end + + +$L$loop: + mov r8d,eax + mov r9d,ebx + mov r14d,ecx + mov r15d,edx + mov r10d,DWORD[rsi] + mov r11d,edx + xor r11d,ecx + lea eax,[((-680876936))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[4+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[((-389564586))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[8+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[606105819+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[12+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[((-1044525330))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[16+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,[((-176418897))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[20+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[1200080426+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[24+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[((-1473231341))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[28+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[((-45705983))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[32+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,[1770035416+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[36+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[((-1958414417))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[40+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[((-42063))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[44+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[((-1990404162))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[48+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,[1804603682+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[52+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[((-40341101))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[56+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[((-1502002290))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[60+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[1236535329+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + mov r10d,DWORD[4+rsi] + mov r11d,edx + mov r12d,edx + not r11d + lea eax,[((-165796510))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[24+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[((-1069501632))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[44+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[643717713+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[((-373897302))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[20+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,[((-701558691))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[40+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[38016083+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[60+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[((-660478335))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[16+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[((-405537848))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[36+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,[568446438+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[56+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[((-1019803690))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[12+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[((-187363961))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[32+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[1163531501+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[52+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,[((-1444681467))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[8+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[((-51403784))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[28+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[1735328473+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[48+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[((-1926607734))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + mov r10d,DWORD[20+rsi] + mov r11d,ecx + lea eax,[((-378558))+r10*1+rax] + mov r10d,DWORD[32+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[((-2022574463))+r10*1+rdx] + mov r10d,DWORD[44+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[1839030562+r10*1+rcx] + mov r10d,DWORD[56+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[((-35309556))+r10*1+rbx] + mov r10d,DWORD[4+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,[((-1530992060))+r10*1+rax] + mov r10d,DWORD[16+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[1272893353+r10*1+rdx] + mov r10d,DWORD[28+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[((-155497632))+r10*1+rcx] + mov r10d,DWORD[40+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[((-1094730640))+r10*1+rbx] + mov r10d,DWORD[52+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,[681279174+r10*1+rax] + mov r10d,DWORD[rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[((-358537222))+r10*1+rdx] + mov r10d,DWORD[12+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[((-722521979))+r10*1+rcx] + mov r10d,DWORD[24+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[76029189+r10*1+rbx] + mov r10d,DWORD[36+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,[((-640364487))+r10*1+rax] + mov r10d,DWORD[48+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[((-421815835))+r10*1+rdx] + mov r10d,DWORD[60+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[530742520+r10*1+rcx] + mov r10d,DWORD[8+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[((-995338651))+r10*1+rbx] + mov r10d,DWORD[rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + mov r10d,DWORD[rsi] + mov r11d,0xffffffff + xor r11d,edx + lea eax,[((-198630844))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[28+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[1126891415+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[56+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[((-1416354905))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[20+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[((-57434055))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[48+rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,[1700485571+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[12+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[((-1894986606))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[40+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[((-1051523))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[4+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[((-2054922799))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[32+rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,[1873313359+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[60+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[((-30611744))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[24+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[((-1560198380))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[52+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[1309151649+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[16+rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,[((-145523070))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[44+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[((-1120210379))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[8+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[718787259+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[36+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[((-343485551))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + + add eax,r8d + add ebx,r9d + add ecx,r14d + add edx,r15d + + + add rsi,64 + cmp rsi,rdi + jb NEAR $L$loop + + +$L$end: + mov DWORD[rbp],eax + mov DWORD[4+rbp],ebx + mov DWORD[8+rbp],ecx + mov DWORD[12+rbp],edx + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r12,QWORD[16+rsp] + + mov rbx,QWORD[24+rsp] + + mov rbp,QWORD[32+rsp] + + add rsp,40 + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_md5_block_asm_data_order: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rax,[40+rax] + + mov rbp,QWORD[((-8))+rax] + mov rbx,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r14,QWORD[((-32))+rax] + mov r15,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_md5_block_asm_data_order wrt ..imagebase + DD $L$SEH_end_md5_block_asm_data_order wrt ..imagebase + DD $L$SEH_info_md5_block_asm_data_order wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_md5_block_asm_data_order: + DB 9,0,0,0 + DD se_handler wrt ..imagebase +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-apple.S b/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-apple.S new file mode 100644 index 0000000000..c8469e6f80 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-apple.S @@ -0,0 +1,1726 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include "openssl/arm_arch.h" + +.section __TEXT,__const +.align 5 +Lpoly: +.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 +LRR: // 2^512 mod P precomputed for NIST P256 polynomial +.quad 0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd +Lone_mont: +.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe +Lone: +.quad 1,0,0,0 +Lord: +.quad 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000 +LordK: +.quad 0xccd1c8aaee00bc4f +.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.text + +// void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl _ecp_nistz256_mul_mont +.private_extern _ecp_nistz256_mul_mont + +.align 4 +_ecp_nistz256_mul_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_mul_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl _ecp_nistz256_sqr_mont +.private_extern _ecp_nistz256_sqr_mont + +.align 4 +_ecp_nistz256_sqr_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sqr_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl _ecp_nistz256_div_by_2 +.private_extern _ecp_nistz256_div_by_2 + +.align 4 +_ecp_nistz256_div_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_div_by_2 + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl _ecp_nistz256_mul_by_2 +.private_extern _ecp_nistz256_mul_by_2 + +.align 4 +_ecp_nistz256_mul_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + + bl __ecp_nistz256_add_to // ret = a+a // 2*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl _ecp_nistz256_mul_by_3 +.private_extern _ecp_nistz256_mul_by_3 + +.align 4 +_ecp_nistz256_mul_by_3: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + + bl __ecp_nistz256_add_to // ret = a+a // 2*a + + mov x8,x4 + mov x9,x5 + mov x10,x6 + mov x11,x7 + + bl __ecp_nistz256_add_to // ret += a // 2*a+a=3*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl _ecp_nistz256_sub +.private_extern _ecp_nistz256_sub + +.align 4 +_ecp_nistz256_sub: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl _ecp_nistz256_neg +.private_extern _ecp_nistz256_neg + +.align 4 +_ecp_nistz256_neg: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x2,x1 + mov x14,xzr // a = 0 + mov x15,xzr + mov x16,xzr + mov x17,xzr + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded +// to x4-x7 and b[0] - to x3 + +.align 4 +__ecp_nistz256_mul_mont: + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x11,x7,x3 + ldr x3,[x2,#8] // b[1] + + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adc x19,xzr,x11 + mov x20,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(1+1)] // b[1+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(2+1)] // b[2+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + // last reduction + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adcs x17,x19,x11 + adc x19,x20,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret + + +// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded +// to x4-x7 + +.align 4 +__ecp_nistz256_sqr_mont: + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x2,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + lsl x8,x14,#32 + adcs x1,x1,x11 + lsr x9,x14,#32 + adc x2,x2,x7 + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adc x17,x11,xzr // can't overflow + + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x2 + adc x19,xzr,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret + + +// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to +// x4-x7 and x8-x11. This is done because it's used in multiple +// contexts, e.g. in multiplication by 2 and 3... + +.align 4 +__ecp_nistz256_add_to: + adds x14,x14,x8 // ret = a+b + adcs x15,x15,x9 + adcs x16,x16,x10 + adcs x17,x17,x11 + adc x1,xzr,xzr // zap x1 + + adds x8,x14,#1 // subs x8,x4,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x1,xzr // did subtraction borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret + + + +.align 4 +__ecp_nistz256_sub_from: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x14,x8 // ret = a-b + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbcs x17,x17,x11 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret + + + +.align 4 +__ecp_nistz256_sub_morf: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x8,x14 // ret = b-a + sbcs x15,x9,x15 + sbcs x16,x10,x16 + sbcs x17,x11,x17 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret + + + +.align 4 +__ecp_nistz256_div_by_2: + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = a+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adcs x11,x17,x13 + adc x1,xzr,xzr // zap x1 + tst x14,#1 // is a even? + + csel x14,x14,x8,eq // ret = even ? a : a+modulus + csel x15,x15,x9,eq + csel x16,x16,x10,eq + csel x17,x17,x11,eq + csel x1,xzr,x1,eq + + lsr x14,x14,#1 // ret >>= 1 + orr x14,x14,x15,lsl#63 + lsr x15,x15,#1 + orr x15,x15,x16,lsl#63 + lsr x16,x16,#1 + orr x16,x16,x17,lsl#63 + lsr x17,x17,#1 + stp x14,x15,[x0] + orr x17,x17,x1,lsl#63 + stp x16,x17,[x0,#16] + + ret + +.globl _ecp_nistz256_point_double +.private_extern _ecp_nistz256_point_double + +.align 5 +_ecp_nistz256_point_double: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + sub sp,sp,#32*4 + +Ldouble_shortcut: + ldp x14,x15,[x1,#32] + mov x21,x0 + ldp x16,x17,[x1,#48] + mov x22,x1 + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + mov x8,x14 + ldr x13,[x13,#24] + mov x9,x15 + ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[x22,#64+16] + add x0,sp,#0 + bl __ecp_nistz256_add_to // p256_mul_by_2(S, in_y); + + add x0,sp,#64 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z); + + ldp x8,x9,[x22] + ldp x10,x11,[x22,#16] + mov x4,x14 // put Zsqr aside for p256_sub + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add_to // p256_add(M, Zsqr, in_x); + + add x2,x22,#0 + mov x14,x4 // restore Zsqr + mov x15,x5 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x16,x6 + mov x17,x7 + ldp x6,x7,[sp,#0+16] + add x0,sp,#64 + bl __ecp_nistz256_sub_morf // p256_sub(Zsqr, in_x, Zsqr); + + add x0,sp,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(S, S); + + ldr x3,[x22,#32] + ldp x4,x5,[x22,#64] + ldp x6,x7,[x22,#64+16] + add x2,x22,#32 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(tmp0, in_z, in_y); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#0+16] + add x0,x21,#64 + bl __ecp_nistz256_add_to // p256_mul_by_2(res_z, tmp0); + + add x0,sp,#96 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S); + + ldr x3,[sp,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x0,x21,#32 + bl __ecp_nistz256_div_by_2 // p256_div_by_2(res_y, tmp0); + + add x2,sp,#64 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(M, M, Zsqr); + + mov x8,x14 // duplicate M + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 // put M aside + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add_to + mov x8,x4 // restore M + mov x9,x5 + ldr x3,[x22] // forward load for p256_mul_mont + mov x10,x6 + ldp x4,x5,[sp,#0] + mov x11,x7 + ldp x6,x7,[sp,#0+16] + bl __ecp_nistz256_add_to // p256_mul_by_3(M, M); + + add x2,x22,#0 + add x0,sp,#0 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, in_x); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#32] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#32+16] + add x0,sp,#96 + bl __ecp_nistz256_add_to // p256_mul_by_2(tmp0, S); + + add x0,x21,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M); + + add x2,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, tmp0); + + add x2,sp,#0 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(S, S, res_x); + + ldr x3,[sp,#32] + mov x4,x14 // copy S + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x2,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, M); + + add x2,x21,#32 + add x0,x21,#32 + bl __ecp_nistz256_sub_from // p256_sub(res_y, S, res_y); + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl _ecp_nistz256_point_add +.private_extern _ecp_nistz256_point_add + +.align 5 +_ecp_nistz256_point_add: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#32*12 + + ldp x4,x5,[x2,#64] // in2_z + ldp x6,x7,[x2,#64+16] + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + orr x8,x4,x5 + orr x10,x6,x7 + orr x25,x8,x10 + cmp x25,#0 + csetm x25,ne // ~in2infty + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); + + ldp x4,x5,[x22,#64] // in1_z + ldp x6,x7,[x22,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x2,x23,#64 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, Z2sqr, in2_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x22,#64 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#32] + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x2,x22,#32 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, S1, in1_y); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#352] + ldp x6,x7,[sp,#352+16] + add x2,x23,#32 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,sp,#320 + ldr x3,[sp,#192] // forward load for p256_mul_mont + ldp x4,x5,[x22] + ldp x6,x7,[x22,#16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, S1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x26,x14,x16 // ~is_equal(S1,S2) + + add x2,sp,#192 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U1, in1_x, Z2sqr); + + ldr x3,[sp,#128] + ldp x4,x5,[x23] + ldp x6,x7,[x23,#16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in2_x, Z1sqr); + + add x2,sp,#256 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, U1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x14,x14,x16 // ~is_equal(U1,U2) + + mvn x27,x24 // -1/0 -> 0/-1 + mvn x28,x25 // -1/0 -> 0/-1 + orr x14,x14,x27 + orr x14,x14,x28 + orr x14,x14,x26 + cbnz x14,Ladd_proceed // if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2)) + +Ladd_double: + mov x1,x22 + mov x0,x21 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + add sp,sp,#256 // #256 is from #32*(12-4). difference in stack frames + b Ldouble_shortcut + +.align 4 +Ladd_proceed: + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#64] + ldp x6,x7,[sp,#64+16] + add x2,x23,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, res_z, in2_z); + + ldr x3,[sp,#96] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,sp,#96 + add x0,sp,#224 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[sp,#128] + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, U1, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#128 + bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#192 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#224 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#288 + ldr x3,[sp,#224] // forward load for p256_mul_mont + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,sp,#224 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S1, Hcub); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#160 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#352 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + +Ladd_done: + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl _ecp_nistz256_point_add_affine +.private_extern _ecp_nistz256_point_add_affine + +.align 5 +_ecp_nistz256_point_add_affine: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + sub sp,sp,#32*10 + + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,Lpoly@PAGE + add x13,x13,Lpoly@PAGEOFF + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + ldp x4,x5,[x1,#64] // in1_z + ldp x6,x7,[x1,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + + ldp x14,x15,[x2] // in2_x + ldp x16,x17,[x2,#16] + ldp x8,x9,[x2,#32] // in2_y + ldp x10,x11,[x2,#48] + orr x14,x14,x15 + orr x16,x16,x17 + orr x8,x8,x9 + orr x10,x10,x11 + orr x14,x14,x16 + orr x8,x8,x10 + orr x25,x14,x8 + cmp x25,#0 + csetm x25,ne // ~in2infty + + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + ldr x3,[x23] + add x2,x23,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, Z1sqr, in2_x); + + add x2,x22,#0 + ldr x3,[x22,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, in1_x); + + add x2,x22,#64 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#160] + ldp x6,x7,[sp,#160+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x23,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,x22,#32 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#192 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, in1_y); + + add x0,sp,#224 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x0,sp,#288 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,sp,#160 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[x22] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,x22,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in1_x, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#224 + bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#288 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#256 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#96 + ldr x3,[x22,#32] // forward load for p256_mul_mont + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,x22,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, in1_y, Hcub); + + ldr x3,[sp,#192] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#192 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#128 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + adrp x23,Lone_mont@PAGE-64 + add x23,x23,Lone_mont@PAGEOFF-64 + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x29,x30,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4], +// uint64_t b[4]); +.globl _ecp_nistz256_ord_mul_mont +.private_extern _ecp_nistz256_ord_mul_mont + +.align 4 +_ecp_nistz256_ord_mul_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,Lord@PAGE + add x23,x23,Lord@PAGEOFF + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x19,x7,x3 + + mul x24,x14,x23 + + adds x15,x15,x8 // accumulate high parts of multiplication + adcs x16,x16,x9 + adcs x17,x17,x10 + adc x19,x19,xzr + mov x20,xzr + ldr x3,[x2,#8*1] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*2] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*3] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + lsl x8,x24,#32 // last reduction + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret + + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4], +// uint64_t rep); +.globl _ecp_nistz256_ord_sqr_mont +.private_extern _ecp_nistz256_ord_sqr_mont + +.align 4 +_ecp_nistz256_ord_sqr_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,Lord@PAGE + add x23,x23,Lord@PAGEOFF + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + b Loop_ord_sqr + +.align 4 +Loop_ord_sqr: + sub x2,x2,#1 + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x3,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + mul x24,x14,x23 + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + adcs x1,x1,x11 + adc x3,x3,x7 + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + mul x24,x14,x23 + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x3 + adc x19,xzr,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x4,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x5,x15,x9,lo + csel x6,x16,x10,lo + csel x7,x17,x11,lo + + cbnz x2,Loop_ord_sqr + + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index); +.globl _ecp_nistz256_select_w5 +.private_extern _ecp_nistz256_select_w5 + +.align 4 +_ecp_nistz256_select_w5: + AARCH64_VALID_CALL_TARGET + + // x10 := x0 + // w9 := 0; loop counter and incremented internal index + mov x10, x0 + mov w9, #0 + + // [v16-v21] := 0 + movi v16.16b, #0 + movi v17.16b, #0 + movi v18.16b, #0 + movi v19.16b, #0 + movi v20.16b, #0 + movi v21.16b, #0 + +Lselect_w5_loop: + // Loop 16 times. + + // Increment index (loop counter); tested at the end of the loop + add w9, w9, #1 + + // [v22-v27] := Load a (3*256-bit = 6*128-bit) table entry starting at x1 + // and advance x1 to point to the next entry + ld1 {v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64 + + // x11 := (w9 == w2)? All 1s : All 0s + cmp w9, w2 + csetm x11, eq + + // continue loading ... + ld1 {v26.2d, v27.2d}, [x1],#32 + + // duplicate mask_64 into Mask (all 0s or all 1s) + dup v3.2d, x11 + + // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19] + // i.e., values in output registers will remain the same if w9 != w2 + bit v16.16b, v22.16b, v3.16b + bit v17.16b, v23.16b, v3.16b + + bit v18.16b, v24.16b, v3.16b + bit v19.16b, v25.16b, v3.16b + + bit v20.16b, v26.16b, v3.16b + bit v21.16b, v27.16b, v3.16b + + // If bit #4 is not 0 (i.e. idx_ctr < 16) loop back + tbz w9, #4, Lselect_w5_loop + + // Write [v16-v21] to memory at the output pointer + st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x10],#64 + st1 {v20.2d, v21.2d}, [x10] + + ret + + + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index); +.globl _ecp_nistz256_select_w7 +.private_extern _ecp_nistz256_select_w7 + +.align 4 +_ecp_nistz256_select_w7: + AARCH64_VALID_CALL_TARGET + + // w9 := 0; loop counter and incremented internal index + mov w9, #0 + + // [v16-v21] := 0 + movi v16.16b, #0 + movi v17.16b, #0 + movi v18.16b, #0 + movi v19.16b, #0 + +Lselect_w7_loop: + // Loop 64 times. + + // Increment index (loop counter); tested at the end of the loop + add w9, w9, #1 + + // [v22-v25] := Load a (2*256-bit = 4*128-bit) table entry starting at x1 + // and advance x1 to point to the next entry + ld1 {v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64 + + // x11 := (w9 == w2)? All 1s : All 0s + cmp w9, w2 + csetm x11, eq + + // duplicate mask_64 into Mask (all 0s or all 1s) + dup v3.2d, x11 + + // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19] + // i.e., values in output registers will remain the same if w9 != w2 + bit v16.16b, v22.16b, v3.16b + bit v17.16b, v23.16b, v3.16b + + bit v18.16b, v24.16b, v3.16b + bit v19.16b, v25.16b, v3.16b + + // If bit #6 is not 0 (i.e. idx_ctr < 64) loop back + tbz w9, #6, Lselect_w7_loop + + // Write [v16-v19] to memory at the output pointer + st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x0] + + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-linux.S b/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-linux.S new file mode 100644 index 0000000000..28d9ac9760 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-linux.S @@ -0,0 +1,1726 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include "openssl/arm_arch.h" + +.section .rodata +.align 5 +.Lpoly: +.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 +.LRR: // 2^512 mod P precomputed for NIST P256 polynomial +.quad 0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd +.Lone_mont: +.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe +.Lone: +.quad 1,0,0,0 +.Lord: +.quad 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000 +.LordK: +.quad 0xccd1c8aaee00bc4f +.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.text + +// void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_mul_mont +.hidden ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,%function +.align 4 +ecp_nistz256_mul_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_mul_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +// void ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_sqr_mont +.hidden ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,%function +.align 4 +ecp_nistz256_sqr_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sqr_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +// void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_div_by_2 +.hidden ecp_nistz256_div_by_2 +.type ecp_nistz256_div_by_2,%function +.align 4 +ecp_nistz256_div_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_div_by_2 + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 + +// void ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_mul_by_2 +.hidden ecp_nistz256_mul_by_2 +.type ecp_nistz256_mul_by_2,%function +.align 4 +ecp_nistz256_mul_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + + bl __ecp_nistz256_add_to // ret = a+a // 2*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 + +// void ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_mul_by_3 +.hidden ecp_nistz256_mul_by_3 +.type ecp_nistz256_mul_by_3,%function +.align 4 +ecp_nistz256_mul_by_3: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + + bl __ecp_nistz256_add_to // ret = a+a // 2*a + + mov x8,x4 + mov x9,x5 + mov x10,x6 + mov x11,x7 + + bl __ecp_nistz256_add_to // ret += a // 2*a+a=3*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 + +// void ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_sub +.hidden ecp_nistz256_sub +.type ecp_nistz256_sub,%function +.align 4 +ecp_nistz256_sub: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_sub,.-ecp_nistz256_sub + +// void ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_neg +.hidden ecp_nistz256_neg +.type ecp_nistz256_neg,%function +.align 4 +ecp_nistz256_neg: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x2,x1 + mov x14,xzr // a = 0 + mov x15,xzr + mov x16,xzr + mov x17,xzr + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_neg,.-ecp_nistz256_neg + +// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded +// to x4-x7 and b[0] - to x3 +.type __ecp_nistz256_mul_mont,%function +.align 4 +__ecp_nistz256_mul_mont: + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x11,x7,x3 + ldr x3,[x2,#8] // b[1] + + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adc x19,xzr,x11 + mov x20,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(1+1)] // b[1+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(2+1)] // b[2+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + // last reduction + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adcs x17,x19,x11 + adc x19,x20,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_mul_mont,.-__ecp_nistz256_mul_mont + +// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded +// to x4-x7 +.type __ecp_nistz256_sqr_mont,%function +.align 4 +__ecp_nistz256_sqr_mont: + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x2,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + lsl x8,x14,#32 + adcs x1,x1,x11 + lsr x9,x14,#32 + adc x2,x2,x7 + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adc x17,x11,xzr // can't overflow + + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x2 + adc x19,xzr,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_sqr_mont,.-__ecp_nistz256_sqr_mont + +// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to +// x4-x7 and x8-x11. This is done because it's used in multiple +// contexts, e.g. in multiplication by 2 and 3... +.type __ecp_nistz256_add_to,%function +.align 4 +__ecp_nistz256_add_to: + adds x14,x14,x8 // ret = a+b + adcs x15,x15,x9 + adcs x16,x16,x10 + adcs x17,x17,x11 + adc x1,xzr,xzr // zap x1 + + adds x8,x14,#1 // subs x8,x4,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x1,xzr // did subtraction borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_add_to,.-__ecp_nistz256_add_to + +.type __ecp_nistz256_sub_from,%function +.align 4 +__ecp_nistz256_sub_from: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x14,x8 // ret = a-b + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbcs x17,x17,x11 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_sub_from,.-__ecp_nistz256_sub_from + +.type __ecp_nistz256_sub_morf,%function +.align 4 +__ecp_nistz256_sub_morf: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x8,x14 // ret = b-a + sbcs x15,x9,x15 + sbcs x16,x10,x16 + sbcs x17,x11,x17 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_sub_morf,.-__ecp_nistz256_sub_morf + +.type __ecp_nistz256_div_by_2,%function +.align 4 +__ecp_nistz256_div_by_2: + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = a+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adcs x11,x17,x13 + adc x1,xzr,xzr // zap x1 + tst x14,#1 // is a even? + + csel x14,x14,x8,eq // ret = even ? a : a+modulus + csel x15,x15,x9,eq + csel x16,x16,x10,eq + csel x17,x17,x11,eq + csel x1,xzr,x1,eq + + lsr x14,x14,#1 // ret >>= 1 + orr x14,x14,x15,lsl#63 + lsr x15,x15,#1 + orr x15,x15,x16,lsl#63 + lsr x16,x16,#1 + orr x16,x16,x17,lsl#63 + lsr x17,x17,#1 + stp x14,x15,[x0] + orr x17,x17,x1,lsl#63 + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_div_by_2,.-__ecp_nistz256_div_by_2 +.globl ecp_nistz256_point_double +.hidden ecp_nistz256_point_double +.type ecp_nistz256_point_double,%function +.align 5 +ecp_nistz256_point_double: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + sub sp,sp,#32*4 + +.Ldouble_shortcut: + ldp x14,x15,[x1,#32] + mov x21,x0 + ldp x16,x17,[x1,#48] + mov x22,x1 + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + mov x8,x14 + ldr x13,[x13,#24] + mov x9,x15 + ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[x22,#64+16] + add x0,sp,#0 + bl __ecp_nistz256_add_to // p256_mul_by_2(S, in_y); + + add x0,sp,#64 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z); + + ldp x8,x9,[x22] + ldp x10,x11,[x22,#16] + mov x4,x14 // put Zsqr aside for p256_sub + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add_to // p256_add(M, Zsqr, in_x); + + add x2,x22,#0 + mov x14,x4 // restore Zsqr + mov x15,x5 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x16,x6 + mov x17,x7 + ldp x6,x7,[sp,#0+16] + add x0,sp,#64 + bl __ecp_nistz256_sub_morf // p256_sub(Zsqr, in_x, Zsqr); + + add x0,sp,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(S, S); + + ldr x3,[x22,#32] + ldp x4,x5,[x22,#64] + ldp x6,x7,[x22,#64+16] + add x2,x22,#32 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(tmp0, in_z, in_y); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#0+16] + add x0,x21,#64 + bl __ecp_nistz256_add_to // p256_mul_by_2(res_z, tmp0); + + add x0,sp,#96 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S); + + ldr x3,[sp,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x0,x21,#32 + bl __ecp_nistz256_div_by_2 // p256_div_by_2(res_y, tmp0); + + add x2,sp,#64 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(M, M, Zsqr); + + mov x8,x14 // duplicate M + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 // put M aside + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add_to + mov x8,x4 // restore M + mov x9,x5 + ldr x3,[x22] // forward load for p256_mul_mont + mov x10,x6 + ldp x4,x5,[sp,#0] + mov x11,x7 + ldp x6,x7,[sp,#0+16] + bl __ecp_nistz256_add_to // p256_mul_by_3(M, M); + + add x2,x22,#0 + add x0,sp,#0 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, in_x); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#32] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#32+16] + add x0,sp,#96 + bl __ecp_nistz256_add_to // p256_mul_by_2(tmp0, S); + + add x0,x21,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M); + + add x2,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, tmp0); + + add x2,sp,#0 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(S, S, res_x); + + ldr x3,[sp,#32] + mov x4,x14 // copy S + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x2,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, M); + + add x2,x21,#32 + add x0,x21,#32 + bl __ecp_nistz256_sub_from // p256_sub(res_y, S, res_y); + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_point_double,.-ecp_nistz256_point_double +.globl ecp_nistz256_point_add +.hidden ecp_nistz256_point_add +.type ecp_nistz256_point_add,%function +.align 5 +ecp_nistz256_point_add: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#32*12 + + ldp x4,x5,[x2,#64] // in2_z + ldp x6,x7,[x2,#64+16] + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + orr x8,x4,x5 + orr x10,x6,x7 + orr x25,x8,x10 + cmp x25,#0 + csetm x25,ne // ~in2infty + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); + + ldp x4,x5,[x22,#64] // in1_z + ldp x6,x7,[x22,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x2,x23,#64 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, Z2sqr, in2_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x22,#64 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#32] + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x2,x22,#32 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, S1, in1_y); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#352] + ldp x6,x7,[sp,#352+16] + add x2,x23,#32 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,sp,#320 + ldr x3,[sp,#192] // forward load for p256_mul_mont + ldp x4,x5,[x22] + ldp x6,x7,[x22,#16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, S1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x26,x14,x16 // ~is_equal(S1,S2) + + add x2,sp,#192 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U1, in1_x, Z2sqr); + + ldr x3,[sp,#128] + ldp x4,x5,[x23] + ldp x6,x7,[x23,#16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in2_x, Z1sqr); + + add x2,sp,#256 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, U1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x14,x14,x16 // ~is_equal(U1,U2) + + mvn x27,x24 // -1/0 -> 0/-1 + mvn x28,x25 // -1/0 -> 0/-1 + orr x14,x14,x27 + orr x14,x14,x28 + orr x14,x14,x26 + cbnz x14,.Ladd_proceed // if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2)) + +.Ladd_double: + mov x1,x22 + mov x0,x21 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + add sp,sp,#256 // #256 is from #32*(12-4). difference in stack frames + b .Ldouble_shortcut + +.align 4 +.Ladd_proceed: + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#64] + ldp x6,x7,[sp,#64+16] + add x2,x23,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, res_z, in2_z); + + ldr x3,[sp,#96] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,sp,#96 + add x0,sp,#224 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[sp,#128] + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, U1, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#128 + bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#192 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#224 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#288 + ldr x3,[sp,#224] // forward load for p256_mul_mont + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,sp,#224 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S1, Hcub); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#160 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#352 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + +.Ladd_done: + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_point_add,.-ecp_nistz256_point_add +.globl ecp_nistz256_point_add_affine +.hidden ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,%function +.align 5 +ecp_nistz256_point_add_affine: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + sub sp,sp,#32*10 + + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,.Lpoly + add x13,x13,:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + ldp x4,x5,[x1,#64] // in1_z + ldp x6,x7,[x1,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + + ldp x14,x15,[x2] // in2_x + ldp x16,x17,[x2,#16] + ldp x8,x9,[x2,#32] // in2_y + ldp x10,x11,[x2,#48] + orr x14,x14,x15 + orr x16,x16,x17 + orr x8,x8,x9 + orr x10,x10,x11 + orr x14,x14,x16 + orr x8,x8,x10 + orr x25,x14,x8 + cmp x25,#0 + csetm x25,ne // ~in2infty + + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + ldr x3,[x23] + add x2,x23,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, Z1sqr, in2_x); + + add x2,x22,#0 + ldr x3,[x22,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, in1_x); + + add x2,x22,#64 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#160] + ldp x6,x7,[sp,#160+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x23,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,x22,#32 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#192 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, in1_y); + + add x0,sp,#224 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x0,sp,#288 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,sp,#160 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[x22] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,x22,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in1_x, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#224 + bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#288 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#256 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#96 + ldr x3,[x22,#32] // forward load for p256_mul_mont + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,x22,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, in1_y, Hcub); + + ldr x3,[sp,#192] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#192 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#128 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + adrp x23,.Lone_mont-64 + add x23,x23,:lo12:.Lone_mont-64 + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x29,x30,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4], +// uint64_t b[4]); +.globl ecp_nistz256_ord_mul_mont +.hidden ecp_nistz256_ord_mul_mont +.type ecp_nistz256_ord_mul_mont,%function +.align 4 +ecp_nistz256_ord_mul_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,.Lord + add x23,x23,:lo12:.Lord + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x19,x7,x3 + + mul x24,x14,x23 + + adds x15,x15,x8 // accumulate high parts of multiplication + adcs x16,x16,x9 + adcs x17,x17,x10 + adc x19,x19,xzr + mov x20,xzr + ldr x3,[x2,#8*1] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*2] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*3] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + lsl x8,x24,#32 // last reduction + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret +.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4], +// uint64_t rep); +.globl ecp_nistz256_ord_sqr_mont +.hidden ecp_nistz256_ord_sqr_mont +.type ecp_nistz256_ord_sqr_mont,%function +.align 4 +ecp_nistz256_ord_sqr_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,.Lord + add x23,x23,:lo12:.Lord + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + b .Loop_ord_sqr + +.align 4 +.Loop_ord_sqr: + sub x2,x2,#1 + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x3,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + mul x24,x14,x23 + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + adcs x1,x1,x11 + adc x3,x3,x7 + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + mul x24,x14,x23 + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x3 + adc x19,xzr,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x4,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x5,x15,x9,lo + csel x6,x16,x10,lo + csel x7,x17,x11,lo + + cbnz x2,.Loop_ord_sqr + + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret +.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_select_w5 +.hidden ecp_nistz256_select_w5 +.type ecp_nistz256_select_w5,%function +.align 4 +ecp_nistz256_select_w5: + AARCH64_VALID_CALL_TARGET + + // x10 := x0 + // w9 := 0; loop counter and incremented internal index + mov x10, x0 + mov w9, #0 + + // [v16-v21] := 0 + movi v16.16b, #0 + movi v17.16b, #0 + movi v18.16b, #0 + movi v19.16b, #0 + movi v20.16b, #0 + movi v21.16b, #0 + +.Lselect_w5_loop: + // Loop 16 times. + + // Increment index (loop counter); tested at the end of the loop + add w9, w9, #1 + + // [v22-v27] := Load a (3*256-bit = 6*128-bit) table entry starting at x1 + // and advance x1 to point to the next entry + ld1 {v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64 + + // x11 := (w9 == w2)? All 1s : All 0s + cmp w9, w2 + csetm x11, eq + + // continue loading ... + ld1 {v26.2d, v27.2d}, [x1],#32 + + // duplicate mask_64 into Mask (all 0s or all 1s) + dup v3.2d, x11 + + // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19] + // i.e., values in output registers will remain the same if w9 != w2 + bit v16.16b, v22.16b, v3.16b + bit v17.16b, v23.16b, v3.16b + + bit v18.16b, v24.16b, v3.16b + bit v19.16b, v25.16b, v3.16b + + bit v20.16b, v26.16b, v3.16b + bit v21.16b, v27.16b, v3.16b + + // If bit #4 is not 0 (i.e. idx_ctr < 16) loop back + tbz w9, #4, .Lselect_w5_loop + + // Write [v16-v21] to memory at the output pointer + st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x10],#64 + st1 {v20.2d, v21.2d}, [x10] + + ret +.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 + + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_select_w7 +.hidden ecp_nistz256_select_w7 +.type ecp_nistz256_select_w7,%function +.align 4 +ecp_nistz256_select_w7: + AARCH64_VALID_CALL_TARGET + + // w9 := 0; loop counter and incremented internal index + mov w9, #0 + + // [v16-v21] := 0 + movi v16.16b, #0 + movi v17.16b, #0 + movi v18.16b, #0 + movi v19.16b, #0 + +.Lselect_w7_loop: + // Loop 64 times. + + // Increment index (loop counter); tested at the end of the loop + add w9, w9, #1 + + // [v22-v25] := Load a (2*256-bit = 4*128-bit) table entry starting at x1 + // and advance x1 to point to the next entry + ld1 {v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64 + + // x11 := (w9 == w2)? All 1s : All 0s + cmp w9, w2 + csetm x11, eq + + // duplicate mask_64 into Mask (all 0s or all 1s) + dup v3.2d, x11 + + // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19] + // i.e., values in output registers will remain the same if w9 != w2 + bit v16.16b, v22.16b, v3.16b + bit v17.16b, v23.16b, v3.16b + + bit v18.16b, v24.16b, v3.16b + bit v19.16b, v25.16b, v3.16b + + // If bit #6 is not 0 (i.e. idx_ctr < 64) loop back + tbz w9, #6, .Lselect_w7_loop + + // Write [v16-v19] to memory at the output pointer + st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x0] + + ret +.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-win.S b/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-win.S new file mode 100644 index 0000000000..a55d20d27b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256-armv8-asm-win.S @@ -0,0 +1,1766 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include "openssl/arm_arch.h" + +.section .rodata +.align 5 +Lpoly: +.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 +LRR: // 2^512 mod P precomputed for NIST P256 polynomial +.quad 0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd +Lone_mont: +.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe +Lone: +.quad 1,0,0,0 +Lord: +.quad 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000 +LordK: +.quad 0xccd1c8aaee00bc4f +.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.text + +// void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_mul_mont + +.def ecp_nistz256_mul_mont + .type 32 +.endef +.align 4 +ecp_nistz256_mul_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_mul_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_sqr_mont + +.def ecp_nistz256_sqr_mont + .type 32 +.endef +.align 4 +ecp_nistz256_sqr_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sqr_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_div_by_2 + +.def ecp_nistz256_div_by_2 + .type 32 +.endef +.align 4 +ecp_nistz256_div_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_div_by_2 + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_mul_by_2 + +.def ecp_nistz256_mul_by_2 + .type 32 +.endef +.align 4 +ecp_nistz256_mul_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + + bl __ecp_nistz256_add_to // ret = a+a // 2*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_mul_by_3 + +.def ecp_nistz256_mul_by_3 + .type 32 +.endef +.align 4 +ecp_nistz256_mul_by_3: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + + bl __ecp_nistz256_add_to // ret = a+a // 2*a + + mov x8,x4 + mov x9,x5 + mov x10,x6 + mov x11,x7 + + bl __ecp_nistz256_add_to // ret += a // 2*a+a=3*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_sub + +.def ecp_nistz256_sub + .type 32 +.endef +.align 4 +ecp_nistz256_sub: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// void ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_neg + +.def ecp_nistz256_neg + .type 32 +.endef +.align 4 +ecp_nistz256_neg: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x2,x1 + mov x14,xzr // a = 0 + mov x15,xzr + mov x16,xzr + mov x17,xzr + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded +// to x4-x7 and b[0] - to x3 +.def __ecp_nistz256_mul_mont + .type 32 +.endef +.align 4 +__ecp_nistz256_mul_mont: + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x11,x7,x3 + ldr x3,[x2,#8] // b[1] + + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adc x19,xzr,x11 + mov x20,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(1+1)] // b[1+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(2+1)] // b[2+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + // last reduction + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adcs x17,x19,x11 + adc x19,x20,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret + + +// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded +// to x4-x7 +.def __ecp_nistz256_sqr_mont + .type 32 +.endef +.align 4 +__ecp_nistz256_sqr_mont: + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x2,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + lsl x8,x14,#32 + adcs x1,x1,x11 + lsr x9,x14,#32 + adc x2,x2,x7 + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adc x17,x11,xzr // can't overflow + + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x2 + adc x19,xzr,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret + + +// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to +// x4-x7 and x8-x11. This is done because it's used in multiple +// contexts, e.g. in multiplication by 2 and 3... +.def __ecp_nistz256_add_to + .type 32 +.endef +.align 4 +__ecp_nistz256_add_to: + adds x14,x14,x8 // ret = a+b + adcs x15,x15,x9 + adcs x16,x16,x10 + adcs x17,x17,x11 + adc x1,xzr,xzr // zap x1 + + adds x8,x14,#1 // subs x8,x4,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x1,xzr // did subtraction borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret + + +.def __ecp_nistz256_sub_from + .type 32 +.endef +.align 4 +__ecp_nistz256_sub_from: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x14,x8 // ret = a-b + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbcs x17,x17,x11 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret + + +.def __ecp_nistz256_sub_morf + .type 32 +.endef +.align 4 +__ecp_nistz256_sub_morf: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x8,x14 // ret = b-a + sbcs x15,x9,x15 + sbcs x16,x10,x16 + sbcs x17,x11,x17 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret + + +.def __ecp_nistz256_div_by_2 + .type 32 +.endef +.align 4 +__ecp_nistz256_div_by_2: + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = a+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adcs x11,x17,x13 + adc x1,xzr,xzr // zap x1 + tst x14,#1 // is a even? + + csel x14,x14,x8,eq // ret = even ? a : a+modulus + csel x15,x15,x9,eq + csel x16,x16,x10,eq + csel x17,x17,x11,eq + csel x1,xzr,x1,eq + + lsr x14,x14,#1 // ret >>= 1 + orr x14,x14,x15,lsl#63 + lsr x15,x15,#1 + orr x15,x15,x16,lsl#63 + lsr x16,x16,#1 + orr x16,x16,x17,lsl#63 + lsr x17,x17,#1 + stp x14,x15,[x0] + orr x17,x17,x1,lsl#63 + stp x16,x17,[x0,#16] + + ret + +.globl ecp_nistz256_point_double + +.def ecp_nistz256_point_double + .type 32 +.endef +.align 5 +ecp_nistz256_point_double: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + sub sp,sp,#32*4 + +Ldouble_shortcut: + ldp x14,x15,[x1,#32] + mov x21,x0 + ldp x16,x17,[x1,#48] + mov x22,x1 + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + mov x8,x14 + ldr x13,[x13,#24] + mov x9,x15 + ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[x22,#64+16] + add x0,sp,#0 + bl __ecp_nistz256_add_to // p256_mul_by_2(S, in_y); + + add x0,sp,#64 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z); + + ldp x8,x9,[x22] + ldp x10,x11,[x22,#16] + mov x4,x14 // put Zsqr aside for p256_sub + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add_to // p256_add(M, Zsqr, in_x); + + add x2,x22,#0 + mov x14,x4 // restore Zsqr + mov x15,x5 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x16,x6 + mov x17,x7 + ldp x6,x7,[sp,#0+16] + add x0,sp,#64 + bl __ecp_nistz256_sub_morf // p256_sub(Zsqr, in_x, Zsqr); + + add x0,sp,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(S, S); + + ldr x3,[x22,#32] + ldp x4,x5,[x22,#64] + ldp x6,x7,[x22,#64+16] + add x2,x22,#32 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(tmp0, in_z, in_y); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#0+16] + add x0,x21,#64 + bl __ecp_nistz256_add_to // p256_mul_by_2(res_z, tmp0); + + add x0,sp,#96 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S); + + ldr x3,[sp,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x0,x21,#32 + bl __ecp_nistz256_div_by_2 // p256_div_by_2(res_y, tmp0); + + add x2,sp,#64 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(M, M, Zsqr); + + mov x8,x14 // duplicate M + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 // put M aside + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add_to + mov x8,x4 // restore M + mov x9,x5 + ldr x3,[x22] // forward load for p256_mul_mont + mov x10,x6 + ldp x4,x5,[sp,#0] + mov x11,x7 + ldp x6,x7,[sp,#0+16] + bl __ecp_nistz256_add_to // p256_mul_by_3(M, M); + + add x2,x22,#0 + add x0,sp,#0 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, in_x); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#32] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#32+16] + add x0,sp,#96 + bl __ecp_nistz256_add_to // p256_mul_by_2(tmp0, S); + + add x0,x21,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M); + + add x2,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, tmp0); + + add x2,sp,#0 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(S, S, res_x); + + ldr x3,[sp,#32] + mov x4,x14 // copy S + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x2,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, M); + + add x2,x21,#32 + add x0,x21,#32 + bl __ecp_nistz256_sub_from // p256_sub(res_y, S, res_y); + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl ecp_nistz256_point_add + +.def ecp_nistz256_point_add + .type 32 +.endef +.align 5 +ecp_nistz256_point_add: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#32*12 + + ldp x4,x5,[x2,#64] // in2_z + ldp x6,x7,[x2,#64+16] + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + orr x8,x4,x5 + orr x10,x6,x7 + orr x25,x8,x10 + cmp x25,#0 + csetm x25,ne // ~in2infty + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); + + ldp x4,x5,[x22,#64] // in1_z + ldp x6,x7,[x22,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x2,x23,#64 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, Z2sqr, in2_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x22,#64 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#32] + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x2,x22,#32 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, S1, in1_y); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#352] + ldp x6,x7,[sp,#352+16] + add x2,x23,#32 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,sp,#320 + ldr x3,[sp,#192] // forward load for p256_mul_mont + ldp x4,x5,[x22] + ldp x6,x7,[x22,#16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, S1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x26,x14,x16 // ~is_equal(S1,S2) + + add x2,sp,#192 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U1, in1_x, Z2sqr); + + ldr x3,[sp,#128] + ldp x4,x5,[x23] + ldp x6,x7,[x23,#16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in2_x, Z1sqr); + + add x2,sp,#256 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, U1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x14,x14,x16 // ~is_equal(U1,U2) + + mvn x27,x24 // -1/0 -> 0/-1 + mvn x28,x25 // -1/0 -> 0/-1 + orr x14,x14,x27 + orr x14,x14,x28 + orr x14,x14,x26 + cbnz x14,Ladd_proceed // if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2)) + +Ladd_double: + mov x1,x22 + mov x0,x21 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + add sp,sp,#256 // #256 is from #32*(12-4). difference in stack frames + b Ldouble_shortcut + +.align 4 +Ladd_proceed: + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#64] + ldp x6,x7,[sp,#64+16] + add x2,x23,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, res_z, in2_z); + + ldr x3,[sp,#96] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,sp,#96 + add x0,sp,#224 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[sp,#128] + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, U1, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#128 + bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#192 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#224 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#288 + ldr x3,[sp,#224] // forward load for p256_mul_mont + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,sp,#224 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S1, Hcub); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#160 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#352 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + +Ladd_done: + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl ecp_nistz256_point_add_affine + +.def ecp_nistz256_point_add_affine + .type 32 +.endef +.align 5 +ecp_nistz256_point_add_affine: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + sub sp,sp,#32*10 + + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,Lpoly + add x13,x13,:lo12:Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + ldp x4,x5,[x1,#64] // in1_z + ldp x6,x7,[x1,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + + ldp x14,x15,[x2] // in2_x + ldp x16,x17,[x2,#16] + ldp x8,x9,[x2,#32] // in2_y + ldp x10,x11,[x2,#48] + orr x14,x14,x15 + orr x16,x16,x17 + orr x8,x8,x9 + orr x10,x10,x11 + orr x14,x14,x16 + orr x8,x8,x10 + orr x25,x14,x8 + cmp x25,#0 + csetm x25,ne // ~in2infty + + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + ldr x3,[x23] + add x2,x23,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, Z1sqr, in2_x); + + add x2,x22,#0 + ldr x3,[x22,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, in1_x); + + add x2,x22,#64 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#160] + ldp x6,x7,[sp,#160+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x23,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,x22,#32 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#192 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, in1_y); + + add x0,sp,#224 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x0,sp,#288 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,sp,#160 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[x22] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,x22,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in1_x, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#224 + bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#288 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#256 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#96 + ldr x3,[x22,#32] // forward load for p256_mul_mont + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,x22,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, in1_y, Hcub); + + ldr x3,[sp,#192] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#192 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#128 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + adrp x23,Lone_mont-64 + add x23,x23,:lo12:Lone_mont-64 + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x29,x30,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4], +// uint64_t b[4]); +.globl ecp_nistz256_ord_mul_mont + +.def ecp_nistz256_ord_mul_mont + .type 32 +.endef +.align 4 +ecp_nistz256_ord_mul_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,Lord + add x23,x23,:lo12:Lord + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x19,x7,x3 + + mul x24,x14,x23 + + adds x15,x15,x8 // accumulate high parts of multiplication + adcs x16,x16,x9 + adcs x17,x17,x10 + adc x19,x19,xzr + mov x20,xzr + ldr x3,[x2,#8*1] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*2] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*3] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + lsl x8,x24,#32 // last reduction + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret + + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4], +// uint64_t rep); +.globl ecp_nistz256_ord_sqr_mont + +.def ecp_nistz256_ord_sqr_mont + .type 32 +.endef +.align 4 +ecp_nistz256_ord_sqr_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,Lord + add x23,x23,:lo12:Lord + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + b Loop_ord_sqr + +.align 4 +Loop_ord_sqr: + sub x2,x2,#1 + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x3,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + mul x24,x14,x23 + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + adcs x1,x1,x11 + adc x3,x3,x7 + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + mul x24,x14,x23 + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x3 + adc x19,xzr,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x4,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x5,x15,x9,lo + csel x6,x16,x10,lo + csel x7,x17,x11,lo + + cbnz x2,Loop_ord_sqr + + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_select_w5 + +.def ecp_nistz256_select_w5 + .type 32 +.endef +.align 4 +ecp_nistz256_select_w5: + AARCH64_VALID_CALL_TARGET + + // x10 := x0 + // w9 := 0; loop counter and incremented internal index + mov x10, x0 + mov w9, #0 + + // [v16-v21] := 0 + movi v16.16b, #0 + movi v17.16b, #0 + movi v18.16b, #0 + movi v19.16b, #0 + movi v20.16b, #0 + movi v21.16b, #0 + +Lselect_w5_loop: + // Loop 16 times. + + // Increment index (loop counter); tested at the end of the loop + add w9, w9, #1 + + // [v22-v27] := Load a (3*256-bit = 6*128-bit) table entry starting at x1 + // and advance x1 to point to the next entry + ld1 {v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64 + + // x11 := (w9 == w2)? All 1s : All 0s + cmp w9, w2 + csetm x11, eq + + // continue loading ... + ld1 {v26.2d, v27.2d}, [x1],#32 + + // duplicate mask_64 into Mask (all 0s or all 1s) + dup v3.2d, x11 + + // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19] + // i.e., values in output registers will remain the same if w9 != w2 + bit v16.16b, v22.16b, v3.16b + bit v17.16b, v23.16b, v3.16b + + bit v18.16b, v24.16b, v3.16b + bit v19.16b, v25.16b, v3.16b + + bit v20.16b, v26.16b, v3.16b + bit v21.16b, v27.16b, v3.16b + + // If bit #4 is not 0 (i.e. idx_ctr < 16) loop back + tbz w9, #4, Lselect_w5_loop + + // Write [v16-v21] to memory at the output pointer + st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x10],#64 + st1 {v20.2d, v21.2d}, [x10] + + ret + + + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_select_w7 + +.def ecp_nistz256_select_w7 + .type 32 +.endef +.align 4 +ecp_nistz256_select_w7: + AARCH64_VALID_CALL_TARGET + + // w9 := 0; loop counter and incremented internal index + mov w9, #0 + + // [v16-v21] := 0 + movi v16.16b, #0 + movi v17.16b, #0 + movi v18.16b, #0 + movi v19.16b, #0 + +Lselect_w7_loop: + // Loop 64 times. + + // Increment index (loop counter); tested at the end of the loop + add w9, w9, #1 + + // [v22-v25] := Load a (2*256-bit = 4*128-bit) table entry starting at x1 + // and advance x1 to point to the next entry + ld1 {v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64 + + // x11 := (w9 == w2)? All 1s : All 0s + cmp w9, w2 + csetm x11, eq + + // duplicate mask_64 into Mask (all 0s or all 1s) + dup v3.2d, x11 + + // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19] + // i.e., values in output registers will remain the same if w9 != w2 + bit v16.16b, v22.16b, v3.16b + bit v17.16b, v23.16b, v3.16b + + bit v18.16b, v24.16b, v3.16b + bit v19.16b, v25.16b, v3.16b + + // If bit #6 is not 0 (i.e. idx_ctr < 64) loop back + tbz w9, #6, Lselect_w7_loop + + // Write [v16-v19] to memory at the output pointer + st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x0] + + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-apple.S b/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-apple.S new file mode 100644 index 0000000000..81cb582fe5 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-apple.S @@ -0,0 +1,4473 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + + +.section __DATA,__const +.p2align 6 +L$poly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + +L$One: +.long 1,1,1,1,1,1,1,1 +L$Two: +.long 2,2,2,2,2,2,2,2 +L$Three: +.long 3,3,3,3,3,3,3,3 +L$ONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + + +L$ord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +L$ordK: +.quad 0xccd1c8aaee00bc4f +.text + + + +.globl _ecp_nistz256_neg +.private_extern _ecp_nistz256_neg + +.p2align 5 +_ecp_nistz256_neg: + +_CET_ENDBR + pushq %r12 + + pushq %r13 + +L$neg_body: + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$neg_epilogue: + ret + + + + + + + + +.globl _ecp_nistz256_ord_mul_mont +.private_extern _ecp_nistz256_ord_mul_mont + +.p2align 5 +_ecp_nistz256_ord_mul_mont: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$ecp_nistz256_ord_mul_montx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_mul_body: + + movq 0(%rdx),%rax + movq %rdx,%rbx + leaq L$ord(%rip),%r14 + movq L$ordK(%rip),%r15 + + + movq %rax,%rcx + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + + movq %r8,%r13 + imulq %r15,%r8 + + movq %rdx,%r11 + mulq 24(%rsi) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq 0(%r14) + movq %r8,%rbp + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%r8 + + mulq 8(%r14) + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %rbp,%rax + adcq %rdx,%r10 + movq %rbp,%rdx + adcq $0,%r8 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 8(%rbx),%rax + sbbq %rdx,%rbp + + addq %r8,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + + movq %r9,%rcx + imulq %r15,%r9 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + mulq 0(%r14) + movq %r9,%rbp + addq %rax,%rcx + movq %r9,%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%r9 + + mulq 8(%r14) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq %rdx,%r11 + movq %rbp,%rdx + adcq $0,%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r12 + movq 16(%rbx),%rax + sbbq %rdx,%rbp + + addq %r9,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + + movq %r10,%rcx + imulq %r15,%r10 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + mulq 0(%r14) + movq %r10,%rbp + addq %rax,%rcx + movq %r10,%rax + adcq %rdx,%rcx + + subq %r10,%r12 + sbbq $0,%r10 + + mulq 8(%r14) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq %rdx,%r12 + movq %rbp,%rdx + adcq $0,%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r13 + movq 24(%rbx),%rax + sbbq %rdx,%rbp + + addq %r10,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + + movq %r11,%rcx + imulq %r15,%r11 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + mulq 0(%r14) + movq %r11,%rbp + addq %rax,%rcx + movq %r11,%rax + adcq %rdx,%rcx + + subq %r11,%r13 + sbbq $0,%r11 + + mulq 8(%r14) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + movq %rbp,%rdx + adcq $0,%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + sbbq %rdx,%rbp + + addq %r11,%r8 + adcq %rbp,%r9 + adcq $0,%r10 + + + movq %r12,%rsi + subq 0(%r14),%r12 + movq %r13,%r11 + sbbq 8(%r14),%r13 + movq %r8,%rcx + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rsi,%r12 + cmovcq %r11,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_mul_epilogue: + ret + + + + + + + + + +.globl _ecp_nistz256_ord_sqr_mont +.private_extern _ecp_nistz256_ord_sqr_mont + +.p2align 5 +_ecp_nistz256_ord_sqr_mont: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$ecp_nistz256_ord_sqr_montx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_sqr_body: + + movq 0(%rsi),%r8 + movq 8(%rsi),%rax + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + leaq L$ord(%rip),%rsi + movq %rdx,%rbx + jmp L$oop_ord_sqr + +.p2align 5 +L$oop_ord_sqr: + + movq %rax,%rbp + mulq %r8 + movq %rax,%r9 +.byte 102,72,15,110,205 + movq %r14,%rax + movq %rdx,%r10 + + mulq %r8 + addq %rax,%r10 + movq %r15,%rax +.byte 102,73,15,110,214 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r8 + addq %rax,%r11 + movq %r15,%rax +.byte 102,73,15,110,223 + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + movq %rax,%r13 + movq %r14,%rax + movq %rdx,%r14 + + + mulq %rbp + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + + addq %r15,%r12 + adcq %rdx,%r13 + adcq $0,%r14 + + + xorq %r15,%r15 + movq %r8,%rax + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + + mulq %rax + movq %rax,%r8 +.byte 102,72,15,126,200 + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r9 + adcq %rax,%r10 +.byte 102,72,15,126,208 + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r11 + adcq %rax,%r12 +.byte 102,72,15,126,216 + adcq $0,%rdx + movq %rdx,%rbp + + movq %r8,%rcx + imulq 32(%rsi),%r8 + + mulq %rax + addq %rbp,%r13 + adcq %rax,%r14 + movq 0(%rsi),%rax + adcq %rdx,%r15 + + + mulq %r8 + movq %r8,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%rbp + + mulq %r8 + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r8,%rax + adcq %rdx,%r10 + movq %r8,%rdx + adcq $0,%rbp + + movq %r9,%rcx + imulq 32(%rsi),%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 0(%rsi),%rax + sbbq %rdx,%r8 + + addq %rbp,%r11 + adcq $0,%r8 + + + mulq %r9 + movq %r9,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%rbp + + mulq %r9 + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r9,%rdx + adcq $0,%rbp + + movq %r10,%rcx + imulq 32(%rsi),%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + movq 0(%rsi),%rax + sbbq %rdx,%r9 + + addq %rbp,%r8 + adcq $0,%r9 + + + mulq %r10 + movq %r10,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r10,%r8 + sbbq $0,%rbp + + mulq %r10 + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r10,%rax + adcq %rdx,%r8 + movq %r10,%rdx + adcq $0,%rbp + + movq %r11,%rcx + imulq 32(%rsi),%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r9 + movq 0(%rsi),%rax + sbbq %rdx,%r10 + + addq %rbp,%r9 + adcq $0,%r10 + + + mulq %r11 + movq %r11,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r11,%r9 + sbbq $0,%rbp + + mulq %r11 + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + movq %r11,%rdx + adcq $0,%rbp + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r10 + sbbq %rdx,%r11 + + addq %rbp,%r10 + adcq $0,%r11 + + + xorq %rdx,%rdx + addq %r12,%r8 + adcq %r13,%r9 + movq %r8,%r12 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%rax + adcq $0,%rdx + + + subq 0(%rsi),%r8 + movq %r10,%r14 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r15 + sbbq 24(%rsi),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r8 + cmovncq %r9,%rax + cmovncq %r10,%r14 + cmovncq %r11,%r15 + + decq %rbx + jnz L$oop_ord_sqr + + movq %r8,0(%rdi) + movq %rax,8(%rdi) + pxor %xmm1,%xmm1 + movq %r14,16(%rdi) + pxor %xmm2,%xmm2 + movq %r15,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_sqr_epilogue: + ret + + + + +.p2align 5 +ecp_nistz256_ord_mul_montx: + +L$ecp_nistz256_ord_mul_montx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_mulx_body: + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + leaq L$ord-128(%rip),%r14 + movq L$ordK(%rip),%r15 + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + mulxq %r11,%rbp,%r11 + addq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + mulxq %r15,%rdx,%rax + adcq %rbp,%r10 + adcq %rcx,%r11 + adcq $0,%r12 + + + xorq %r13,%r13 + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%r14),%rcx,%rbp + movq 8(%rbx),%rdx + adcxq %rcx,%r11 + adoxq %rbp,%r12 + adcxq %r8,%r12 + adoxq %r8,%r13 + adcq $0,%r13 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%r14),%rcx,%rbp + movq 16(%rbx),%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcxq %r9,%r13 + adoxq %r9,%r8 + adcq $0,%r8 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%r14),%rcx,%rbp + movq 24(%rbx),%rdx + adcxq %rcx,%r13 + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcq $0,%r9 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%r14),%rcx,%rbp + leaq 128(%r14),%r14 + movq %r12,%rbx + adcxq %rcx,%r8 + adoxq %rbp,%r9 + movq %r13,%rdx + adcxq %r11,%r9 + adoxq %r11,%r10 + adcq $0,%r10 + + + + movq %r8,%rcx + subq 0(%r14),%r12 + sbbq 8(%r14),%r13 + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_mulx_epilogue: + ret + + + + +.p2align 5 +ecp_nistz256_ord_sqr_montx: + +L$ecp_nistz256_ord_sqr_montx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_sqrx_body: + + movq %rdx,%rbx + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq L$ord(%rip),%rsi + jmp L$oop_ord_sqrx + +.p2align 5 +L$oop_ord_sqrx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + movq %rdx,%rax +.byte 102,73,15,110,206 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + addq %rcx,%r10 +.byte 102,73,15,110,215 + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + mulxq %r8,%rcx,%r14 + movq %rax,%rdx +.byte 102,73,15,110,216 + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + + mulxq %rdx,%r8,%rbp +.byte 102,72,15,126,202 + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax +.byte 102,72,15,126,210 + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 + mulxq %rdx,%rcx,%rbp +.byte 0x67 +.byte 102,72,15,126,218 + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + adoxq %rbp,%r13 + mulxq %rdx,%rcx,%rax + adoxq %rcx,%r14 + adoxq %rax,%r15 + + + movq %r8,%rdx + mulxq 32(%rsi),%rdx,%rcx + + xorq %rax,%rax + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + adcxq %rax,%r8 + + + movq %r9,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + adoxq %rax,%r9 + + + movq %r10,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + adcxq %rax,%r10 + + + movq %r11,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + adoxq %rax,%r11 + + + addq %r8,%r12 + adcq %r13,%r9 + movq %r12,%rdx + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%r14 + adcq $0,%rax + + + subq 0(%rsi),%r12 + movq %r10,%r15 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r8 + sbbq 24(%rsi),%r11 + sbbq $0,%rax + + cmovncq %r12,%rdx + cmovncq %r9,%r14 + cmovncq %r10,%r15 + cmovncq %r11,%r8 + + decq %rbx + jnz L$oop_ord_sqrx + + movq %rdx,0(%rdi) + movq %r14,8(%rdi) + pxor %xmm1,%xmm1 + movq %r15,16(%rdi) + pxor %xmm2,%xmm2 + movq %r8,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_sqrx_epilogue: + ret + + + + + + + + +.globl _ecp_nistz256_mul_mont +.private_extern _ecp_nistz256_mul_mont + +.p2align 5 +_ecp_nistz256_mul_mont: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx +L$mul_mont: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$mul_body: + cmpl $0x80100,%ecx + je L$mul_montx + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq + jmp L$mul_mont_done + +.p2align 5 +L$mul_montx: + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_mul_montx +L$mul_mont_done: + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$mul_epilogue: + ret + + + + +.p2align 5 +__ecp_nistz256_mul_montq: + + + + movq %rax,%rbp + mulq %r9 + movq L$poly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq L$poly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + + + + + + + + +.globl _ecp_nistz256_sqr_mont +.private_extern _ecp_nistz256_sqr_mont + +.p2align 5 +_ecp_nistz256_sqr_mont: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$sqr_body: + cmpl $0x80100,%ecx + je L$sqr_montx + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq + jmp L$sqr_mont_done + +.p2align 5 +L$sqr_montx: + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_sqr_montx +L$sqr_mont_done: + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$sqr_epilogue: + ret + + + + +.p2align 5 +__ecp_nistz256_sqr_montq: + + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq L$poly+8(%rip),%rsi + movq L$poly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + ret + + + +.p2align 5 +__ecp_nistz256_mul_montx: + + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + movq $32,%r14 + xorq %r13,%r13 + mulxq %r11,%rbp,%r11 + movq L$poly+24(%rip),%r15 + adcq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + adcq %rbp,%r10 + shlxq %r14,%r8,%rbp + adcq %rcx,%r11 + shrxq %r14,%r8,%rcx + adcq $0,%r12 + + + + addq %rbp,%r9 + adcq %rcx,%r10 + + mulxq %r15,%rcx,%rbp + movq 8(%rbx),%rdx + adcq %rcx,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + adcxq %rcx,%r12 + shlxq %r14,%r9,%rcx + adoxq %rbp,%r13 + shrxq %r14,%r9,%rbp + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + + addq %rcx,%r10 + adcq %rbp,%r11 + + mulxq %r15,%rcx,%rbp + movq 16(%rbx),%rdx + adcq %rcx,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + adcxq %rcx,%r13 + shlxq %r14,%r10,%rcx + adoxq %rbp,%r8 + shrxq %r14,%r10,%rbp + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + + addq %rcx,%r11 + adcq %rbp,%r12 + + mulxq %r15,%rcx,%rbp + movq 24(%rbx),%rdx + adcq %rcx,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + adcxq %rcx,%r8 + shlxq %r14,%r11,%rcx + adoxq %rbp,%r9 + shrxq %r14,%r11,%rbp + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + + addq %rcx,%r12 + adcq %rbp,%r13 + + mulxq %r15,%rcx,%rbp + movq %r12,%rbx + movq L$poly+8(%rip),%r14 + adcq %rcx,%r8 + movq %r13,%rdx + adcq %rbp,%r9 + adcq $0,%r10 + + + + xorl %eax,%eax + movq %r8,%rcx + sbbq $-1,%r12 + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rbp + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %rbp,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + + +.p2align 5 +__ecp_nistz256_sqr_montx: + + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + xorl %eax,%eax + adcq %rcx,%r10 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + + mulxq %r8,%rcx,%r14 + movq 0+128(%rsi),%rdx + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + mulxq %rdx,%r8,%rbp + movq 8+128(%rsi),%rdx + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax + movq 16+128(%rsi),%rdx + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 +.byte 0x67 + mulxq %rdx,%rcx,%rbp + movq 24+128(%rsi),%rdx + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + movq $32,%rsi + adoxq %rbp,%r13 +.byte 0x67,0x67 + mulxq %rdx,%rcx,%rax + movq L$poly+24(%rip),%rdx + adoxq %rcx,%r14 + shlxq %rsi,%r8,%rcx + adoxq %rax,%r15 + shrxq %rsi,%r8,%rax + movq %rdx,%rbp + + + addq %rcx,%r9 + adcq %rax,%r10 + + mulxq %r8,%rcx,%r8 + adcq %rcx,%r11 + shlxq %rsi,%r9,%rcx + adcq $0,%r8 + shrxq %rsi,%r9,%rax + + + addq %rcx,%r10 + adcq %rax,%r11 + + mulxq %r9,%rcx,%r9 + adcq %rcx,%r8 + shlxq %rsi,%r10,%rcx + adcq $0,%r9 + shrxq %rsi,%r10,%rax + + + addq %rcx,%r11 + adcq %rax,%r8 + + mulxq %r10,%rcx,%r10 + adcq %rcx,%r9 + shlxq %rsi,%r11,%rcx + adcq $0,%r10 + shrxq %rsi,%r11,%rax + + + addq %rcx,%r8 + adcq %rax,%r9 + + mulxq %r11,%rcx,%r11 + adcq %rcx,%r10 + adcq $0,%r11 + + xorq %rdx,%rdx + addq %r8,%r12 + movq L$poly+8(%rip),%rsi + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r13,%r9 + adcq $0,%rdx + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%r11 + sbbq %rbp,%r15 + sbbq $0,%rdx + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %r11,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + ret + + + + +.globl _ecp_nistz256_select_w5 +.private_extern _ecp_nistz256_select_w5 + +.p2align 5 +_ecp_nistz256_select_w5: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rax + movq 8(%rax),%rax + testl $32,%eax + jnz L$avx2_select_w5 + movdqa L$One(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +L$select_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz L$select_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + ret + +L$SEH_end_ecp_nistz256_select_w5: + + + + +.globl _ecp_nistz256_select_w7 +.private_extern _ecp_nistz256_select_w7 + +.p2align 5 +_ecp_nistz256_select_w7: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rax + movq 8(%rax),%rax + testl $32,%eax + jnz L$avx2_select_w7 + movdqa L$One(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +L$select_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz L$select_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + ret + +L$SEH_end_ecp_nistz256_select_w7: + + + + +.p2align 5 +ecp_nistz256_avx2_select_w5: + +L$avx2_select_w5: + vzeroupper + vmovdqa L$Two(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + vpxor %ymm4,%ymm4,%ymm4 + + vmovdqa L$One(%rip),%ymm5 + vmovdqa L$Two(%rip),%ymm10 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + movq $8,%rax +L$select_loop_avx2_w5: + + vmovdqa 0(%rsi),%ymm6 + vmovdqa 32(%rsi),%ymm7 + vmovdqa 64(%rsi),%ymm8 + + vmovdqa 96(%rsi),%ymm11 + vmovdqa 128(%rsi),%ymm12 + vmovdqa 160(%rsi),%ymm13 + + vpcmpeqd %ymm1,%ymm5,%ymm9 + vpcmpeqd %ymm1,%ymm10,%ymm14 + + vpaddd %ymm0,%ymm5,%ymm5 + vpaddd %ymm0,%ymm10,%ymm10 + leaq 192(%rsi),%rsi + + vpand %ymm9,%ymm6,%ymm6 + vpand %ymm9,%ymm7,%ymm7 + vpand %ymm9,%ymm8,%ymm8 + vpand %ymm14,%ymm11,%ymm11 + vpand %ymm14,%ymm12,%ymm12 + vpand %ymm14,%ymm13,%ymm13 + + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm7,%ymm3,%ymm3 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm11,%ymm2,%ymm2 + vpxor %ymm12,%ymm3,%ymm3 + vpxor %ymm13,%ymm4,%ymm4 + + decq %rax + jnz L$select_loop_avx2_w5 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vzeroupper + ret + +L$SEH_end_ecp_nistz256_avx2_select_w5: + + + + +.globl _ecp_nistz256_avx2_select_w7 +.private_extern _ecp_nistz256_avx2_select_w7 + +.p2align 5 +_ecp_nistz256_avx2_select_w7: + +L$avx2_select_w7: +_CET_ENDBR + vzeroupper + vmovdqa L$Three(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + + vmovdqa L$One(%rip),%ymm4 + vmovdqa L$Two(%rip),%ymm8 + vmovdqa L$Three(%rip),%ymm12 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + + movq $21,%rax +L$select_loop_avx2_w7: + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vmovdqa 64(%rsi),%ymm9 + vmovdqa 96(%rsi),%ymm10 + + vmovdqa 128(%rsi),%ymm13 + vmovdqa 160(%rsi),%ymm14 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + vpcmpeqd %ymm1,%ymm8,%ymm11 + vpcmpeqd %ymm1,%ymm12,%ymm15 + + vpaddd %ymm0,%ymm4,%ymm4 + vpaddd %ymm0,%ymm8,%ymm8 + vpaddd %ymm0,%ymm12,%ymm12 + leaq 192(%rsi),%rsi + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + vpand %ymm11,%ymm9,%ymm9 + vpand %ymm11,%ymm10,%ymm10 + vpand %ymm15,%ymm13,%ymm13 + vpand %ymm15,%ymm14,%ymm14 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm9,%ymm2,%ymm2 + vpxor %ymm10,%ymm3,%ymm3 + vpxor %ymm13,%ymm2,%ymm2 + vpxor %ymm14,%ymm3,%ymm3 + + decq %rax + jnz L$select_loop_avx2_w7 + + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vzeroupper + ret + +L$SEH_end_ecp_nistz256_avx2_select_w7: + + +.p2align 5 +__ecp_nistz256_add_toq: + + xorq %r11,%r11 + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + + +.p2align 5 +__ecp_nistz256_sub_fromq: + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + + +.p2align 5 +__ecp_nistz256_subq: + + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + ret + + + + +.p2align 5 +__ecp_nistz256_mul_by_2q: + + xorq %r11,%r11 + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + +.globl _ecp_nistz256_point_double +.private_extern _ecp_nistz256_point_double + +.p2align 5 +_ecp_nistz256_point_double: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$point_doublex + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $160+8,%rsp + +L$point_doubleq_body: + +L$point_double_shortcutq: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq L$poly+8(%rip),%r14 + movq L$poly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + leaq 160+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_doubleq_epilogue: + ret + + +.globl _ecp_nistz256_point_add +.private_extern _ecp_nistz256_point_add + +.p2align 5 +_ecp_nistz256_point_add: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$point_addx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $576+8,%rsp + +L$point_addq_body: + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + orq %r8,%r12 +.byte 0x3e + jnz L$add_proceedq + + + + testq %r9,%r9 + jz L$add_doubleq + + + + + + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp L$add_doneq + +.p2align 5 +L$add_doubleq: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp + + jmp L$point_double_shortcutq + + +.p2align 5 +L$add_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +L$add_doneq: + leaq 576+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_addq_epilogue: + ret + + +.globl _ecp_nistz256_point_add_affine +.private_extern _ecp_nistz256_point_add_affine + +.p2align 5 +_ecp_nistz256_point_add_affine: + +_CET_ENDBR + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$point_add_affinex + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $480+8,%rsp + +L$add_affineq_body: + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand L$ONE_mont(%rip),%xmm2 + pand L$ONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + leaq 480+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$add_affineq_epilogue: + ret + + + +.p2align 5 +__ecp_nistz256_add_tox: + + xorq %r11,%r11 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + + +.p2align 5 +__ecp_nistz256_sub_fromx: + + xorq %r11,%r11 + sbbq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq $0,%r11 + + xorq %r10,%r10 + adcq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + + +.p2align 5 +__ecp_nistz256_subx: + + xorq %r11,%r11 + sbbq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq $0,%r11 + + xorq %r9,%r9 + adcq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + + btq $0,%r11 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + cmovcq %rcx,%r8 + cmovcq %r10,%r9 + + ret + + + + +.p2align 5 +__ecp_nistz256_mul_by_2x: + + xorq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret + + + +.p2align 5 +ecp_nistz256_point_doublex: + +L$point_doublex: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $160+8,%rsp + +L$point_doublex_body: + +L$point_double_shortcutx: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq L$poly+8(%rip),%r14 + movq L$poly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-128(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 32(%rbx),%rdx + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-128(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rdx + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 0+32(%rsp),%rdx + movq 8+32(%rsp),%r14 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subx + + movq 32(%rsp),%rdx + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-128(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + leaq 160+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_doublex_epilogue: + ret + + + +.p2align 5 +ecp_nistz256_point_addx: + +L$point_addx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $576+8,%rsp + +L$point_addx_body: + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + + leaq 64-128(%rsi),%rsi + movq %rdx,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rdx + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-128(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 416(%rsp),%rdx + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 512(%rsp),%rdx + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq -128+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 480(%rsp),%rdx + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + orq %r8,%r12 +.byte 0x3e + jnz L$add_proceedx + + + + testq %r9,%r9 + jz L$add_doublex + + + + + + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp L$add_donex + +.p2align 5 +L$add_doublex: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp + + jmp L$point_double_shortcutx + + +.p2align 5 +L$add_proceedx: + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq -128+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0(%rsp),%rdx + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 160(%rsp),%rdx + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +L$add_donex: + leaq 576+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_addx_epilogue: + ret + + + +.p2align 5 +ecp_nistz256_point_add_affinex: + +L$point_add_affinex: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $480+8,%rsp + +L$add_affinex_body: + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rdx + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-128(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+96(%rsp),%rdx + movq 8+96(%rsp),%r14 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq -128+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rdx + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq -128+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand L$ONE_mont(%rip),%xmm2 + pand L$ONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + leaq 480+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$add_affinex_epilogue: + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-linux.S b/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-linux.S new file mode 100644 index 0000000000..b28554332a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-linux.S @@ -0,0 +1,4548 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + + +.section .rodata +.align 64 +.Lpoly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + +.LOne: +.long 1,1,1,1,1,1,1,1 +.LTwo: +.long 2,2,2,2,2,2,2,2 +.LThree: +.long 3,3,3,3,3,3,3,3 +.LONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + + +.Lord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +.LordK: +.quad 0xccd1c8aaee00bc4f +.text + + + +.globl ecp_nistz256_neg +.hidden ecp_nistz256_neg +.type ecp_nistz256_neg,@function +.align 32 +ecp_nistz256_neg: +.cfi_startproc +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Lneg_body: + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Lneg_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_neg,.-ecp_nistz256_neg + + + + + + +.globl ecp_nistz256_ord_mul_mont +.hidden ecp_nistz256_ord_mul_mont +.type ecp_nistz256_ord_mul_mont,@function +.align 32 +ecp_nistz256_ord_mul_mont: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lecp_nistz256_ord_mul_montx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_mul_body: + + movq 0(%rdx),%rax + movq %rdx,%rbx + leaq .Lord(%rip),%r14 + movq .LordK(%rip),%r15 + + + movq %rax,%rcx + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + + movq %r8,%r13 + imulq %r15,%r8 + + movq %rdx,%r11 + mulq 24(%rsi) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq 0(%r14) + movq %r8,%rbp + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%r8 + + mulq 8(%r14) + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %rbp,%rax + adcq %rdx,%r10 + movq %rbp,%rdx + adcq $0,%r8 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 8(%rbx),%rax + sbbq %rdx,%rbp + + addq %r8,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + + movq %r9,%rcx + imulq %r15,%r9 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + mulq 0(%r14) + movq %r9,%rbp + addq %rax,%rcx + movq %r9,%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%r9 + + mulq 8(%r14) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq %rdx,%r11 + movq %rbp,%rdx + adcq $0,%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r12 + movq 16(%rbx),%rax + sbbq %rdx,%rbp + + addq %r9,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + + movq %r10,%rcx + imulq %r15,%r10 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + mulq 0(%r14) + movq %r10,%rbp + addq %rax,%rcx + movq %r10,%rax + adcq %rdx,%rcx + + subq %r10,%r12 + sbbq $0,%r10 + + mulq 8(%r14) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq %rdx,%r12 + movq %rbp,%rdx + adcq $0,%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r13 + movq 24(%rbx),%rax + sbbq %rdx,%rbp + + addq %r10,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + + movq %r11,%rcx + imulq %r15,%r11 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + mulq 0(%r14) + movq %r11,%rbp + addq %rax,%rcx + movq %r11,%rax + adcq %rdx,%rcx + + subq %r11,%r13 + sbbq $0,%r11 + + mulq 8(%r14) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + movq %rbp,%rdx + adcq $0,%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + sbbq %rdx,%rbp + + addq %r11,%r8 + adcq %rbp,%r9 + adcq $0,%r10 + + + movq %r12,%rsi + subq 0(%r14),%r12 + movq %r13,%r11 + sbbq 8(%r14),%r13 + movq %r8,%rcx + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rsi,%r12 + cmovcq %r11,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_mul_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont + + + + + + + +.globl ecp_nistz256_ord_sqr_mont +.hidden ecp_nistz256_ord_sqr_mont +.type ecp_nistz256_ord_sqr_mont,@function +.align 32 +ecp_nistz256_ord_sqr_mont: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lecp_nistz256_ord_sqr_montx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_sqr_body: + + movq 0(%rsi),%r8 + movq 8(%rsi),%rax + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + leaq .Lord(%rip),%rsi + movq %rdx,%rbx + jmp .Loop_ord_sqr + +.align 32 +.Loop_ord_sqr: + + movq %rax,%rbp + mulq %r8 + movq %rax,%r9 +.byte 102,72,15,110,205 + movq %r14,%rax + movq %rdx,%r10 + + mulq %r8 + addq %rax,%r10 + movq %r15,%rax +.byte 102,73,15,110,214 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r8 + addq %rax,%r11 + movq %r15,%rax +.byte 102,73,15,110,223 + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + movq %rax,%r13 + movq %r14,%rax + movq %rdx,%r14 + + + mulq %rbp + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + + addq %r15,%r12 + adcq %rdx,%r13 + adcq $0,%r14 + + + xorq %r15,%r15 + movq %r8,%rax + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + + mulq %rax + movq %rax,%r8 +.byte 102,72,15,126,200 + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r9 + adcq %rax,%r10 +.byte 102,72,15,126,208 + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r11 + adcq %rax,%r12 +.byte 102,72,15,126,216 + adcq $0,%rdx + movq %rdx,%rbp + + movq %r8,%rcx + imulq 32(%rsi),%r8 + + mulq %rax + addq %rbp,%r13 + adcq %rax,%r14 + movq 0(%rsi),%rax + adcq %rdx,%r15 + + + mulq %r8 + movq %r8,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%rbp + + mulq %r8 + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r8,%rax + adcq %rdx,%r10 + movq %r8,%rdx + adcq $0,%rbp + + movq %r9,%rcx + imulq 32(%rsi),%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 0(%rsi),%rax + sbbq %rdx,%r8 + + addq %rbp,%r11 + adcq $0,%r8 + + + mulq %r9 + movq %r9,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%rbp + + mulq %r9 + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r9,%rdx + adcq $0,%rbp + + movq %r10,%rcx + imulq 32(%rsi),%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + movq 0(%rsi),%rax + sbbq %rdx,%r9 + + addq %rbp,%r8 + adcq $0,%r9 + + + mulq %r10 + movq %r10,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r10,%r8 + sbbq $0,%rbp + + mulq %r10 + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r10,%rax + adcq %rdx,%r8 + movq %r10,%rdx + adcq $0,%rbp + + movq %r11,%rcx + imulq 32(%rsi),%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r9 + movq 0(%rsi),%rax + sbbq %rdx,%r10 + + addq %rbp,%r9 + adcq $0,%r10 + + + mulq %r11 + movq %r11,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r11,%r9 + sbbq $0,%rbp + + mulq %r11 + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + movq %r11,%rdx + adcq $0,%rbp + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r10 + sbbq %rdx,%r11 + + addq %rbp,%r10 + adcq $0,%r11 + + + xorq %rdx,%rdx + addq %r12,%r8 + adcq %r13,%r9 + movq %r8,%r12 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%rax + adcq $0,%rdx + + + subq 0(%rsi),%r8 + movq %r10,%r14 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r15 + sbbq 24(%rsi),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r8 + cmovncq %r9,%rax + cmovncq %r10,%r14 + cmovncq %r11,%r15 + + decq %rbx + jnz .Loop_ord_sqr + + movq %r8,0(%rdi) + movq %rax,8(%rdi) + pxor %xmm1,%xmm1 + movq %r14,16(%rdi) + pxor %xmm2,%xmm2 + movq %r15,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_sqr_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont + +.type ecp_nistz256_ord_mul_montx,@function +.align 32 +ecp_nistz256_ord_mul_montx: +.cfi_startproc +.Lecp_nistz256_ord_mul_montx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_mulx_body: + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + leaq .Lord-128(%rip),%r14 + movq .LordK(%rip),%r15 + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + mulxq %r11,%rbp,%r11 + addq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + mulxq %r15,%rdx,%rax + adcq %rbp,%r10 + adcq %rcx,%r11 + adcq $0,%r12 + + + xorq %r13,%r13 + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%r14),%rcx,%rbp + movq 8(%rbx),%rdx + adcxq %rcx,%r11 + adoxq %rbp,%r12 + adcxq %r8,%r12 + adoxq %r8,%r13 + adcq $0,%r13 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%r14),%rcx,%rbp + movq 16(%rbx),%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcxq %r9,%r13 + adoxq %r9,%r8 + adcq $0,%r8 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%r14),%rcx,%rbp + movq 24(%rbx),%rdx + adcxq %rcx,%r13 + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcq $0,%r9 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%r14),%rcx,%rbp + leaq 128(%r14),%r14 + movq %r12,%rbx + adcxq %rcx,%r8 + adoxq %rbp,%r9 + movq %r13,%rdx + adcxq %r11,%r9 + adoxq %r11,%r10 + adcq $0,%r10 + + + + movq %r8,%rcx + subq 0(%r14),%r12 + sbbq 8(%r14),%r13 + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_mulx_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx + +.type ecp_nistz256_ord_sqr_montx,@function +.align 32 +ecp_nistz256_ord_sqr_montx: +.cfi_startproc +.Lecp_nistz256_ord_sqr_montx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_sqrx_body: + + movq %rdx,%rbx + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq .Lord(%rip),%rsi + jmp .Loop_ord_sqrx + +.align 32 +.Loop_ord_sqrx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + movq %rdx,%rax +.byte 102,73,15,110,206 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + addq %rcx,%r10 +.byte 102,73,15,110,215 + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + mulxq %r8,%rcx,%r14 + movq %rax,%rdx +.byte 102,73,15,110,216 + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + + mulxq %rdx,%r8,%rbp +.byte 102,72,15,126,202 + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax +.byte 102,72,15,126,210 + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 + mulxq %rdx,%rcx,%rbp +.byte 0x67 +.byte 102,72,15,126,218 + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + adoxq %rbp,%r13 + mulxq %rdx,%rcx,%rax + adoxq %rcx,%r14 + adoxq %rax,%r15 + + + movq %r8,%rdx + mulxq 32(%rsi),%rdx,%rcx + + xorq %rax,%rax + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + adcxq %rax,%r8 + + + movq %r9,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + adoxq %rax,%r9 + + + movq %r10,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + adcxq %rax,%r10 + + + movq %r11,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + adoxq %rax,%r11 + + + addq %r8,%r12 + adcq %r13,%r9 + movq %r12,%rdx + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%r14 + adcq $0,%rax + + + subq 0(%rsi),%r12 + movq %r10,%r15 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r8 + sbbq 24(%rsi),%r11 + sbbq $0,%rax + + cmovncq %r12,%rdx + cmovncq %r9,%r14 + cmovncq %r10,%r15 + cmovncq %r11,%r8 + + decq %rbx + jnz .Loop_ord_sqrx + + movq %rdx,0(%rdi) + movq %r14,8(%rdi) + pxor %xmm1,%xmm1 + movq %r15,16(%rdi) + pxor %xmm2,%xmm2 + movq %r8,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_sqrx_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx + + + + + + +.globl ecp_nistz256_mul_mont +.hidden ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,@function +.align 32 +ecp_nistz256_mul_mont: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx +.Lmul_mont: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lmul_body: + cmpl $0x80100,%ecx + je .Lmul_montx + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq + jmp .Lmul_mont_done + +.align 32 +.Lmul_montx: + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_mul_montx +.Lmul_mont_done: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lmul_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +.type __ecp_nistz256_mul_montq,@function +.align 32 +__ecp_nistz256_mul_montq: +.cfi_startproc + + + movq %rax,%rbp + mulq %r9 + movq .Lpoly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq .Lpoly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq + + + + + + + + +.globl ecp_nistz256_sqr_mont +.hidden ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,@function +.align 32 +ecp_nistz256_sqr_mont: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lsqr_body: + cmpl $0x80100,%ecx + je .Lsqr_montx + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq + jmp .Lsqr_mont_done + +.align 32 +.Lsqr_montx: + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_sqr_montx +.Lsqr_mont_done: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lsqr_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +.type __ecp_nistz256_sqr_montq,@function +.align 32 +__ecp_nistz256_sqr_montq: +.cfi_startproc + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq .Lpoly+8(%rip),%rsi + movq .Lpoly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq +.type __ecp_nistz256_mul_montx,@function +.align 32 +__ecp_nistz256_mul_montx: +.cfi_startproc + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + movq $32,%r14 + xorq %r13,%r13 + mulxq %r11,%rbp,%r11 + movq .Lpoly+24(%rip),%r15 + adcq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + adcq %rbp,%r10 + shlxq %r14,%r8,%rbp + adcq %rcx,%r11 + shrxq %r14,%r8,%rcx + adcq $0,%r12 + + + + addq %rbp,%r9 + adcq %rcx,%r10 + + mulxq %r15,%rcx,%rbp + movq 8(%rbx),%rdx + adcq %rcx,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + adcxq %rcx,%r12 + shlxq %r14,%r9,%rcx + adoxq %rbp,%r13 + shrxq %r14,%r9,%rbp + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + + addq %rcx,%r10 + adcq %rbp,%r11 + + mulxq %r15,%rcx,%rbp + movq 16(%rbx),%rdx + adcq %rcx,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + adcxq %rcx,%r13 + shlxq %r14,%r10,%rcx + adoxq %rbp,%r8 + shrxq %r14,%r10,%rbp + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + + addq %rcx,%r11 + adcq %rbp,%r12 + + mulxq %r15,%rcx,%rbp + movq 24(%rbx),%rdx + adcq %rcx,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + adcxq %rcx,%r8 + shlxq %r14,%r11,%rcx + adoxq %rbp,%r9 + shrxq %r14,%r11,%rbp + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + + addq %rcx,%r12 + adcq %rbp,%r13 + + mulxq %r15,%rcx,%rbp + movq %r12,%rbx + movq .Lpoly+8(%rip),%r14 + adcq %rcx,%r8 + movq %r13,%rdx + adcq %rbp,%r9 + adcq $0,%r10 + + + + xorl %eax,%eax + movq %r8,%rcx + sbbq $-1,%r12 + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rbp + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %rbp,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx + +.type __ecp_nistz256_sqr_montx,@function +.align 32 +__ecp_nistz256_sqr_montx: +.cfi_startproc + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + xorl %eax,%eax + adcq %rcx,%r10 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + + mulxq %r8,%rcx,%r14 + movq 0+128(%rsi),%rdx + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + mulxq %rdx,%r8,%rbp + movq 8+128(%rsi),%rdx + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax + movq 16+128(%rsi),%rdx + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 +.byte 0x67 + mulxq %rdx,%rcx,%rbp + movq 24+128(%rsi),%rdx + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + movq $32,%rsi + adoxq %rbp,%r13 +.byte 0x67,0x67 + mulxq %rdx,%rcx,%rax + movq .Lpoly+24(%rip),%rdx + adoxq %rcx,%r14 + shlxq %rsi,%r8,%rcx + adoxq %rax,%r15 + shrxq %rsi,%r8,%rax + movq %rdx,%rbp + + + addq %rcx,%r9 + adcq %rax,%r10 + + mulxq %r8,%rcx,%r8 + adcq %rcx,%r11 + shlxq %rsi,%r9,%rcx + adcq $0,%r8 + shrxq %rsi,%r9,%rax + + + addq %rcx,%r10 + adcq %rax,%r11 + + mulxq %r9,%rcx,%r9 + adcq %rcx,%r8 + shlxq %rsi,%r10,%rcx + adcq $0,%r9 + shrxq %rsi,%r10,%rax + + + addq %rcx,%r11 + adcq %rax,%r8 + + mulxq %r10,%rcx,%r10 + adcq %rcx,%r9 + shlxq %rsi,%r11,%rcx + adcq $0,%r10 + shrxq %rsi,%r11,%rax + + + addq %rcx,%r8 + adcq %rax,%r9 + + mulxq %r11,%rcx,%r11 + adcq %rcx,%r10 + adcq $0,%r11 + + xorq %rdx,%rdx + addq %r8,%r12 + movq .Lpoly+8(%rip),%rsi + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r13,%r9 + adcq $0,%rdx + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%r11 + sbbq %rbp,%r15 + sbbq $0,%rdx + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %r11,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx + + +.globl ecp_nistz256_select_w5 +.hidden ecp_nistz256_select_w5 +.type ecp_nistz256_select_w5,@function +.align 32 +ecp_nistz256_select_w5: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rax + movq 8(%rax),%rax + testl $32,%eax + jnz .Lavx2_select_w5 + movdqa .LOne(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +.Lselect_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz .Lselect_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + ret +.cfi_endproc +.LSEH_end_ecp_nistz256_select_w5: +.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 + + + +.globl ecp_nistz256_select_w7 +.hidden ecp_nistz256_select_w7 +.type ecp_nistz256_select_w7,@function +.align 32 +ecp_nistz256_select_w7: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rax + movq 8(%rax),%rax + testl $32,%eax + jnz .Lavx2_select_w7 + movdqa .LOne(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +.Lselect_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz .Lselect_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + ret +.cfi_endproc +.LSEH_end_ecp_nistz256_select_w7: +.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 + + +.type ecp_nistz256_avx2_select_w5,@function +.align 32 +ecp_nistz256_avx2_select_w5: +.cfi_startproc +.Lavx2_select_w5: + vzeroupper + vmovdqa .LTwo(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + vpxor %ymm4,%ymm4,%ymm4 + + vmovdqa .LOne(%rip),%ymm5 + vmovdqa .LTwo(%rip),%ymm10 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + movq $8,%rax +.Lselect_loop_avx2_w5: + + vmovdqa 0(%rsi),%ymm6 + vmovdqa 32(%rsi),%ymm7 + vmovdqa 64(%rsi),%ymm8 + + vmovdqa 96(%rsi),%ymm11 + vmovdqa 128(%rsi),%ymm12 + vmovdqa 160(%rsi),%ymm13 + + vpcmpeqd %ymm1,%ymm5,%ymm9 + vpcmpeqd %ymm1,%ymm10,%ymm14 + + vpaddd %ymm0,%ymm5,%ymm5 + vpaddd %ymm0,%ymm10,%ymm10 + leaq 192(%rsi),%rsi + + vpand %ymm9,%ymm6,%ymm6 + vpand %ymm9,%ymm7,%ymm7 + vpand %ymm9,%ymm8,%ymm8 + vpand %ymm14,%ymm11,%ymm11 + vpand %ymm14,%ymm12,%ymm12 + vpand %ymm14,%ymm13,%ymm13 + + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm7,%ymm3,%ymm3 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm11,%ymm2,%ymm2 + vpxor %ymm12,%ymm3,%ymm3 + vpxor %ymm13,%ymm4,%ymm4 + + decq %rax + jnz .Lselect_loop_avx2_w5 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vzeroupper + ret +.cfi_endproc +.LSEH_end_ecp_nistz256_avx2_select_w5: +.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 + + + +.globl ecp_nistz256_avx2_select_w7 +.hidden ecp_nistz256_avx2_select_w7 +.type ecp_nistz256_avx2_select_w7,@function +.align 32 +ecp_nistz256_avx2_select_w7: +.cfi_startproc +.Lavx2_select_w7: +_CET_ENDBR + vzeroupper + vmovdqa .LThree(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + + vmovdqa .LOne(%rip),%ymm4 + vmovdqa .LTwo(%rip),%ymm8 + vmovdqa .LThree(%rip),%ymm12 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + + movq $21,%rax +.Lselect_loop_avx2_w7: + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vmovdqa 64(%rsi),%ymm9 + vmovdqa 96(%rsi),%ymm10 + + vmovdqa 128(%rsi),%ymm13 + vmovdqa 160(%rsi),%ymm14 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + vpcmpeqd %ymm1,%ymm8,%ymm11 + vpcmpeqd %ymm1,%ymm12,%ymm15 + + vpaddd %ymm0,%ymm4,%ymm4 + vpaddd %ymm0,%ymm8,%ymm8 + vpaddd %ymm0,%ymm12,%ymm12 + leaq 192(%rsi),%rsi + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + vpand %ymm11,%ymm9,%ymm9 + vpand %ymm11,%ymm10,%ymm10 + vpand %ymm15,%ymm13,%ymm13 + vpand %ymm15,%ymm14,%ymm14 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm9,%ymm2,%ymm2 + vpxor %ymm10,%ymm3,%ymm3 + vpxor %ymm13,%ymm2,%ymm2 + vpxor %ymm14,%ymm3,%ymm3 + + decq %rax + jnz .Lselect_loop_avx2_w7 + + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vzeroupper + ret +.cfi_endproc +.LSEH_end_ecp_nistz256_avx2_select_w7: +.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +.type __ecp_nistz256_add_toq,@function +.align 32 +__ecp_nistz256_add_toq: +.cfi_startproc + xorq %r11,%r11 + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq + +.type __ecp_nistz256_sub_fromq,@function +.align 32 +__ecp_nistz256_sub_fromq: +.cfi_startproc + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq + +.type __ecp_nistz256_subq,@function +.align 32 +__ecp_nistz256_subq: +.cfi_startproc + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + ret +.cfi_endproc +.size __ecp_nistz256_subq,.-__ecp_nistz256_subq + +.type __ecp_nistz256_mul_by_2q,@function +.align 32 +__ecp_nistz256_mul_by_2q: +.cfi_startproc + xorq %r11,%r11 + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q +.globl ecp_nistz256_point_double +.hidden ecp_nistz256_point_double +.type ecp_nistz256_point_double,@function +.align 32 +ecp_nistz256_point_double: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lpoint_doublex + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $160+8,%rsp +.cfi_adjust_cfa_offset 32*5+8 +.Lpoint_doubleq_body: + +.Lpoint_double_shortcutq: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + leaq 160+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_doubleq_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_point_double,.-ecp_nistz256_point_double +.globl ecp_nistz256_point_add +.hidden ecp_nistz256_point_add +.type ecp_nistz256_point_add,@function +.align 32 +ecp_nistz256_point_add: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lpoint_addx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $576+8,%rsp +.cfi_adjust_cfa_offset 32*18+8 +.Lpoint_addq_body: + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + orq %r8,%r12 +.byte 0x3e + jnz .Ladd_proceedq + + + + testq %r9,%r9 + jz .Ladd_doubleq + + + + + + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp .Ladd_doneq + +.align 32 +.Ladd_doubleq: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp +.cfi_adjust_cfa_offset -416 + jmp .Lpoint_double_shortcutq +.cfi_adjust_cfa_offset 416 + +.align 32 +.Ladd_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_doneq: + leaq 576+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_addq_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_point_add,.-ecp_nistz256_point_add +.globl ecp_nistz256_point_add_affine +.hidden ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,@function +.align 32 +ecp_nistz256_point_add_affine: +.cfi_startproc +_CET_ENDBR + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lpoint_add_affinex + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $480+8,%rsp +.cfi_adjust_cfa_offset 32*15+8 +.Ladd_affineq_body: + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + leaq 480+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Ladd_affineq_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine +.type __ecp_nistz256_add_tox,@function +.align 32 +__ecp_nistz256_add_tox: +.cfi_startproc + xorq %r11,%r11 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox + +.type __ecp_nistz256_sub_fromx,@function +.align 32 +__ecp_nistz256_sub_fromx: +.cfi_startproc + xorq %r11,%r11 + sbbq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq $0,%r11 + + xorq %r10,%r10 + adcq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx + +.type __ecp_nistz256_subx,@function +.align 32 +__ecp_nistz256_subx: +.cfi_startproc + xorq %r11,%r11 + sbbq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq $0,%r11 + + xorq %r9,%r9 + adcq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + + btq $0,%r11 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + cmovcq %rcx,%r8 + cmovcq %r10,%r9 + + ret +.cfi_endproc +.size __ecp_nistz256_subx,.-__ecp_nistz256_subx + +.type __ecp_nistz256_mul_by_2x,@function +.align 32 +__ecp_nistz256_mul_by_2x: +.cfi_startproc + xorq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + ret +.cfi_endproc +.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x +.type ecp_nistz256_point_doublex,@function +.align 32 +ecp_nistz256_point_doublex: +.cfi_startproc +.Lpoint_doublex: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $160+8,%rsp +.cfi_adjust_cfa_offset 32*5+8 +.Lpoint_doublex_body: + +.Lpoint_double_shortcutx: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-128(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 32(%rbx),%rdx + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-128(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rdx + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 0+32(%rsp),%rdx + movq 8+32(%rsp),%r14 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subx + + movq 32(%rsp),%rdx + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-128(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + leaq 160+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_doublex_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex +.type ecp_nistz256_point_addx,@function +.align 32 +ecp_nistz256_point_addx: +.cfi_startproc +.Lpoint_addx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $576+8,%rsp +.cfi_adjust_cfa_offset 32*18+8 +.Lpoint_addx_body: + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + + leaq 64-128(%rsi),%rsi + movq %rdx,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rdx + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-128(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 416(%rsp),%rdx + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 512(%rsp),%rdx + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq -128+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 480(%rsp),%rdx + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + orq %r8,%r12 +.byte 0x3e + jnz .Ladd_proceedx + + + + testq %r9,%r9 + jz .Ladd_doublex + + + + + + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp .Ladd_donex + +.align 32 +.Ladd_doublex: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp +.cfi_adjust_cfa_offset -416 + jmp .Lpoint_double_shortcutx +.cfi_adjust_cfa_offset 416 + +.align 32 +.Ladd_proceedx: + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq -128+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0(%rsp),%rdx + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 160(%rsp),%rdx + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_donex: + leaq 576+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_addx_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx +.type ecp_nistz256_point_add_affinex,@function +.align 32 +ecp_nistz256_point_add_affinex: +.cfi_startproc +.Lpoint_add_affinex: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $480+8,%rsp +.cfi_adjust_cfa_offset 32*15+8 +.Ladd_affinex_body: + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rdx + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-128(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+96(%rsp),%rdx + movq 8+96(%rsp),%r14 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq -128+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rdx + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq -128+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + leaq 480+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Ladd_affinex_epilogue: + ret +.cfi_endproc +.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-win.asm b/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-win.asm new file mode 100644 index 0000000000..c25cac3343 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-win.asm @@ -0,0 +1,5004 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + + +section .rdata rdata align=8 +ALIGN 64 +$L$poly: + DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 + +$L$One: + DD 1,1,1,1,1,1,1,1 +$L$Two: + DD 2,2,2,2,2,2,2,2 +$L$Three: + DD 3,3,3,3,3,3,3,3 +$L$ONE_mont: + DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe + + +$L$ord: + DQ 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000 +$L$ordK: + DQ 0xccd1c8aaee00bc4f +section .text + + + + +global ecp_nistz256_neg + +ALIGN 32 +ecp_nistz256_neg: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_neg: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + push r12 + + push r13 + +$L$neg_body: + + xor r8,r8 + xor r9,r9 + xor r10,r10 + xor r11,r11 + xor r13,r13 + + sub r8,QWORD[rsi] + sbb r9,QWORD[8+rsi] + sbb r10,QWORD[16+rsi] + mov rax,r8 + sbb r11,QWORD[24+rsi] + lea rsi,[$L$poly] + mov rdx,r9 + sbb r13,0 + + add r8,QWORD[rsi] + mov rcx,r10 + adc r9,QWORD[8+rsi] + adc r10,QWORD[16+rsi] + mov r12,r11 + adc r11,QWORD[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD[rdi],r8 + cmovz r10,rcx + mov QWORD[8+rdi],r9 + cmovz r11,r12 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$neg_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_neg: + + + + + + +global ecp_nistz256_ord_mul_mont + +ALIGN 32 +ecp_nistz256_ord_mul_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_mul_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 + cmp ecx,0x80100 + je NEAR $L$ecp_nistz256_ord_mul_montx + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_mul_body: + + mov rax,QWORD[rdx] + mov rbx,rdx + lea r14,[$L$ord] + mov r15,QWORD[$L$ordK] + + + mov rcx,rax + mul QWORD[rsi] + mov r8,rax + mov rax,rcx + mov r9,rdx + + mul QWORD[8+rsi] + add r9,rax + mov rax,rcx + adc rdx,0 + mov r10,rdx + + mul QWORD[16+rsi] + add r10,rax + mov rax,rcx + adc rdx,0 + + mov r13,r8 + imul r8,r15 + + mov r11,rdx + mul QWORD[24+rsi] + add r11,rax + mov rax,r8 + adc rdx,0 + mov r12,rdx + + + mul QWORD[r14] + mov rbp,r8 + add r13,rax + mov rax,r8 + adc rdx,0 + mov rcx,rdx + + sub r10,r8 + sbb r8,0 + + mul QWORD[8+r14] + add r9,rcx + adc rdx,0 + add r9,rax + mov rax,rbp + adc r10,rdx + mov rdx,rbp + adc r8,0 + + shl rax,32 + shr rdx,32 + sub r11,rax + mov rax,QWORD[8+rbx] + sbb rbp,rdx + + add r11,r8 + adc r12,rbp + adc r13,0 + + + mov rcx,rax + mul QWORD[rsi] + add r9,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[8+rsi] + add r10,rbp + adc rdx,0 + add r10,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[16+rsi] + add r11,rbp + adc rdx,0 + add r11,rax + mov rax,rcx + adc rdx,0 + + mov rcx,r9 + imul r9,r15 + + mov rbp,rdx + mul QWORD[24+rsi] + add r12,rbp + adc rdx,0 + xor r8,r8 + add r12,rax + mov rax,r9 + adc r13,rdx + adc r8,0 + + + mul QWORD[r14] + mov rbp,r9 + add rcx,rax + mov rax,r9 + adc rcx,rdx + + sub r11,r9 + sbb r9,0 + + mul QWORD[8+r14] + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,rbp + adc r11,rdx + mov rdx,rbp + adc r9,0 + + shl rax,32 + shr rdx,32 + sub r12,rax + mov rax,QWORD[16+rbx] + sbb rbp,rdx + + add r12,r9 + adc r13,rbp + adc r8,0 + + + mov rcx,rax + mul QWORD[rsi] + add r10,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[8+rsi] + add r11,rbp + adc rdx,0 + add r11,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[16+rsi] + add r12,rbp + adc rdx,0 + add r12,rax + mov rax,rcx + adc rdx,0 + + mov rcx,r10 + imul r10,r15 + + mov rbp,rdx + mul QWORD[24+rsi] + add r13,rbp + adc rdx,0 + xor r9,r9 + add r13,rax + mov rax,r10 + adc r8,rdx + adc r9,0 + + + mul QWORD[r14] + mov rbp,r10 + add rcx,rax + mov rax,r10 + adc rcx,rdx + + sub r12,r10 + sbb r10,0 + + mul QWORD[8+r14] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc r12,rdx + mov rdx,rbp + adc r10,0 + + shl rax,32 + shr rdx,32 + sub r13,rax + mov rax,QWORD[24+rbx] + sbb rbp,rdx + + add r13,r10 + adc r8,rbp + adc r9,0 + + + mov rcx,rax + mul QWORD[rsi] + add r11,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[8+rsi] + add r12,rbp + adc rdx,0 + add r12,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[16+rsi] + add r13,rbp + adc rdx,0 + add r13,rax + mov rax,rcx + adc rdx,0 + + mov rcx,r11 + imul r11,r15 + + mov rbp,rdx + mul QWORD[24+rsi] + add r8,rbp + adc rdx,0 + xor r10,r10 + add r8,rax + mov rax,r11 + adc r9,rdx + adc r10,0 + + + mul QWORD[r14] + mov rbp,r11 + add rcx,rax + mov rax,r11 + adc rcx,rdx + + sub r13,r11 + sbb r11,0 + + mul QWORD[8+r14] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc r13,rdx + mov rdx,rbp + adc r11,0 + + shl rax,32 + shr rdx,32 + sub r8,rax + sbb rbp,rdx + + add r8,r11 + adc r9,rbp + adc r10,0 + + + mov rsi,r12 + sub r12,QWORD[r14] + mov r11,r13 + sbb r13,QWORD[8+r14] + mov rcx,r8 + sbb r8,QWORD[16+r14] + mov rbp,r9 + sbb r9,QWORD[24+r14] + sbb r10,0 + + cmovc r12,rsi + cmovc r13,r11 + cmovc r8,rcx + cmovc r9,rbp + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_ord_mul_mont: + + + + + + + +global ecp_nistz256_ord_sqr_mont + +ALIGN 32 +ecp_nistz256_ord_sqr_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_sqr_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 + cmp ecx,0x80100 + je NEAR $L$ecp_nistz256_ord_sqr_montx + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_sqr_body: + + mov r8,QWORD[rsi] + mov rax,QWORD[8+rsi] + mov r14,QWORD[16+rsi] + mov r15,QWORD[24+rsi] + lea rsi,[$L$ord] + mov rbx,rdx + jmp NEAR $L$oop_ord_sqr + +ALIGN 32 +$L$oop_ord_sqr: + + mov rbp,rax + mul r8 + mov r9,rax +DB 102,72,15,110,205 + mov rax,r14 + mov r10,rdx + + mul r8 + add r10,rax + mov rax,r15 +DB 102,73,15,110,214 + adc rdx,0 + mov r11,rdx + + mul r8 + add r11,rax + mov rax,r15 +DB 102,73,15,110,223 + adc rdx,0 + mov r12,rdx + + + mul r14 + mov r13,rax + mov rax,r14 + mov r14,rdx + + + mul rbp + add r11,rax + mov rax,r15 + adc rdx,0 + mov r15,rdx + + mul rbp + add r12,rax + adc rdx,0 + + add r12,r15 + adc r13,rdx + adc r14,0 + + + xor r15,r15 + mov rax,r8 + add r9,r9 + adc r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,0 + + + mul rax + mov r8,rax +DB 102,72,15,126,200 + mov rbp,rdx + + mul rax + add r9,rbp + adc r10,rax +DB 102,72,15,126,208 + adc rdx,0 + mov rbp,rdx + + mul rax + add r11,rbp + adc r12,rax +DB 102,72,15,126,216 + adc rdx,0 + mov rbp,rdx + + mov rcx,r8 + imul r8,QWORD[32+rsi] + + mul rax + add r13,rbp + adc r14,rax + mov rax,QWORD[rsi] + adc r15,rdx + + + mul r8 + mov rbp,r8 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r10,r8 + sbb rbp,0 + + mul r8 + add r9,rcx + adc rdx,0 + add r9,rax + mov rax,r8 + adc r10,rdx + mov rdx,r8 + adc rbp,0 + + mov rcx,r9 + imul r9,QWORD[32+rsi] + + shl rax,32 + shr rdx,32 + sub r11,rax + mov rax,QWORD[rsi] + sbb r8,rdx + + add r11,rbp + adc r8,0 + + + mul r9 + mov rbp,r9 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r11,r9 + sbb rbp,0 + + mul r9 + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,r9 + adc r11,rdx + mov rdx,r9 + adc rbp,0 + + mov rcx,r10 + imul r10,QWORD[32+rsi] + + shl rax,32 + shr rdx,32 + sub r8,rax + mov rax,QWORD[rsi] + sbb r9,rdx + + add r8,rbp + adc r9,0 + + + mul r10 + mov rbp,r10 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r8,r10 + sbb rbp,0 + + mul r10 + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,r10 + adc r8,rdx + mov rdx,r10 + adc rbp,0 + + mov rcx,r11 + imul r11,QWORD[32+rsi] + + shl rax,32 + shr rdx,32 + sub r9,rax + mov rax,QWORD[rsi] + sbb r10,rdx + + add r9,rbp + adc r10,0 + + + mul r11 + mov rbp,r11 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r9,r11 + sbb rbp,0 + + mul r11 + add r8,rcx + adc rdx,0 + add r8,rax + mov rax,r11 + adc r9,rdx + mov rdx,r11 + adc rbp,0 + + shl rax,32 + shr rdx,32 + sub r10,rax + sbb r11,rdx + + add r10,rbp + adc r11,0 + + + xor rdx,rdx + add r8,r12 + adc r9,r13 + mov r12,r8 + adc r10,r14 + adc r11,r15 + mov rax,r9 + adc rdx,0 + + + sub r8,QWORD[rsi] + mov r14,r10 + sbb r9,QWORD[8+rsi] + sbb r10,QWORD[16+rsi] + mov r15,r11 + sbb r11,QWORD[24+rsi] + sbb rdx,0 + + cmovc r8,r12 + cmovnc rax,r9 + cmovnc r14,r10 + cmovnc r15,r11 + + dec rbx + jnz NEAR $L$oop_ord_sqr + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],rax + pxor xmm1,xmm1 + mov QWORD[16+rdi],r14 + pxor xmm2,xmm2 + mov QWORD[24+rdi],r15 + pxor xmm3,xmm3 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_sqr_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_ord_sqr_mont: + + +ALIGN 32 +ecp_nistz256_ord_mul_montx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_mul_montx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$ecp_nistz256_ord_mul_montx: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_mulx_body: + + mov rbx,rdx + mov rdx,QWORD[rdx] + mov r9,QWORD[rsi] + mov r10,QWORD[8+rsi] + mov r11,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + lea rsi,[((-128))+rsi] + lea r14,[(($L$ord-128))] + mov r15,QWORD[$L$ordK] + + + mulx r9,r8,r9 + mulx r10,rcx,r10 + mulx r11,rbp,r11 + add r9,rcx + mulx r12,rcx,r12 + mov rdx,r8 + mulx rax,rdx,r15 + adc r10,rbp + adc r11,rcx + adc r12,0 + + + xor r13,r13 + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r8,rcx + adox r9,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + mov rdx,QWORD[8+rbx] + adcx r11,rcx + adox r12,rbp + adcx r12,r8 + adox r13,r8 + adc r13,0 + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r9 + mulx rax,rdx,r15 + adcx r12,rcx + adox r13,rbp + + adcx r13,r8 + adox r8,r8 + adc r8,0 + + + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + mov rdx,QWORD[16+rbx] + adcx r12,rcx + adox r13,rbp + adcx r13,r9 + adox r8,r9 + adc r8,0 + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r10 + mulx rax,rdx,r15 + adcx r13,rcx + adox r8,rbp + + adcx r8,r9 + adox r9,r9 + adc r9,0 + + + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + mov rdx,QWORD[24+rbx] + adcx r13,rcx + adox r8,rbp + adcx r8,r10 + adox r9,r10 + adc r9,0 + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r11 + mulx rax,rdx,r15 + adcx r8,rcx + adox r9,rbp + + adcx r9,r10 + adox r10,r10 + adc r10,0 + + + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + lea r14,[128+r14] + mov rbx,r12 + adcx r8,rcx + adox r9,rbp + mov rdx,r13 + adcx r9,r11 + adox r10,r11 + adc r10,0 + + + + mov rcx,r8 + sub r12,QWORD[r14] + sbb r13,QWORD[8+r14] + sbb r8,QWORD[16+r14] + mov rbp,r9 + sbb r9,QWORD[24+r14] + sbb r10,0 + + cmovc r12,rbx + cmovc r13,rdx + cmovc r8,rcx + cmovc r9,rbp + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_mulx_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_ord_mul_montx: + + +ALIGN 32 +ecp_nistz256_ord_sqr_montx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_sqr_montx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$ecp_nistz256_ord_sqr_montx: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_sqrx_body: + + mov rbx,rdx + mov rdx,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r15,QWORD[16+rsi] + mov r8,QWORD[24+rsi] + lea rsi,[$L$ord] + jmp NEAR $L$oop_ord_sqrx + +ALIGN 32 +$L$oop_ord_sqrx: + mulx r10,r9,r14 + mulx r11,rcx,r15 + mov rax,rdx +DB 102,73,15,110,206 + mulx r12,rbp,r8 + mov rdx,r14 + add r10,rcx +DB 102,73,15,110,215 + adc r11,rbp + adc r12,0 + xor r13,r13 + + mulx rbp,rcx,r15 + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,r8 + mov rdx,r15 + adcx r12,rcx + adox r13,rbp + adc r13,0 + + mulx r14,rcx,r8 + mov rdx,rax +DB 102,73,15,110,216 + xor r15,r15 + adcx r9,r9 + adox r13,rcx + adcx r10,r10 + adox r14,r15 + + + mulx rbp,r8,rdx +DB 102,72,15,126,202 + adcx r11,r11 + adox r9,rbp + adcx r12,r12 + mulx rax,rcx,rdx +DB 102,72,15,126,210 + adcx r13,r13 + adox r10,rcx + adcx r14,r14 + mulx rbp,rcx,rdx + DB 0x67 +DB 102,72,15,126,218 + adox r11,rax + adcx r15,r15 + adox r12,rcx + adox r13,rbp + mulx rax,rcx,rdx + adox r14,rcx + adox r15,rax + + + mov rdx,r8 + mulx rcx,rdx,QWORD[32+rsi] + + xor rax,rax + mulx rbp,rcx,QWORD[rsi] + adcx r8,rcx + adox r9,rbp + mulx rbp,rcx,QWORD[8+rsi] + adcx r9,rcx + adox r10,rbp + mulx rbp,rcx,QWORD[16+rsi] + adcx r10,rcx + adox r11,rbp + mulx rbp,rcx,QWORD[24+rsi] + adcx r11,rcx + adox r8,rbp + adcx r8,rax + + + mov rdx,r9 + mulx rcx,rdx,QWORD[32+rsi] + + mulx rbp,rcx,QWORD[rsi] + adox r9,rcx + adcx r10,rbp + mulx rbp,rcx,QWORD[8+rsi] + adox r10,rcx + adcx r11,rbp + mulx rbp,rcx,QWORD[16+rsi] + adox r11,rcx + adcx r8,rbp + mulx rbp,rcx,QWORD[24+rsi] + adox r8,rcx + adcx r9,rbp + adox r9,rax + + + mov rdx,r10 + mulx rcx,rdx,QWORD[32+rsi] + + mulx rbp,rcx,QWORD[rsi] + adcx r10,rcx + adox r11,rbp + mulx rbp,rcx,QWORD[8+rsi] + adcx r11,rcx + adox r8,rbp + mulx rbp,rcx,QWORD[16+rsi] + adcx r8,rcx + adox r9,rbp + mulx rbp,rcx,QWORD[24+rsi] + adcx r9,rcx + adox r10,rbp + adcx r10,rax + + + mov rdx,r11 + mulx rcx,rdx,QWORD[32+rsi] + + mulx rbp,rcx,QWORD[rsi] + adox r11,rcx + adcx r8,rbp + mulx rbp,rcx,QWORD[8+rsi] + adox r8,rcx + adcx r9,rbp + mulx rbp,rcx,QWORD[16+rsi] + adox r9,rcx + adcx r10,rbp + mulx rbp,rcx,QWORD[24+rsi] + adox r10,rcx + adcx r11,rbp + adox r11,rax + + + add r12,r8 + adc r9,r13 + mov rdx,r12 + adc r10,r14 + adc r11,r15 + mov r14,r9 + adc rax,0 + + + sub r12,QWORD[rsi] + mov r15,r10 + sbb r9,QWORD[8+rsi] + sbb r10,QWORD[16+rsi] + mov r8,r11 + sbb r11,QWORD[24+rsi] + sbb rax,0 + + cmovnc rdx,r12 + cmovnc r14,r9 + cmovnc r15,r10 + cmovnc r8,r11 + + dec rbx + jnz NEAR $L$oop_ord_sqrx + + mov QWORD[rdi],rdx + mov QWORD[8+rdi],r14 + pxor xmm1,xmm1 + mov QWORD[16+rdi],r15 + pxor xmm2,xmm2 + mov QWORD[24+rdi],r8 + pxor xmm3,xmm3 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_sqrx_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_ord_sqr_montx: + + + + + + +global ecp_nistz256_mul_mont + +ALIGN 32 +ecp_nistz256_mul_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 +$L$mul_mont: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$mul_body: + cmp ecx,0x80100 + je NEAR $L$mul_montx + mov rbx,rdx + mov rax,QWORD[rdx] + mov r9,QWORD[rsi] + mov r10,QWORD[8+rsi] + mov r11,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + + call __ecp_nistz256_mul_montq + jmp NEAR $L$mul_mont_done + +ALIGN 32 +$L$mul_montx: + mov rbx,rdx + mov rdx,QWORD[rdx] + mov r9,QWORD[rsi] + mov r10,QWORD[8+rsi] + mov r11,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + lea rsi,[((-128))+rsi] + + call __ecp_nistz256_mul_montx +$L$mul_mont_done: + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_mul_mont: + + +ALIGN 32 +__ecp_nistz256_mul_montq: + + + + mov rbp,rax + mul r9 + mov r14,QWORD[(($L$poly+8))] + mov r8,rax + mov rax,rbp + mov r9,rdx + + mul r10 + mov r15,QWORD[(($L$poly+24))] + add r9,rax + mov rax,rbp + adc rdx,0 + mov r10,rdx + + mul r11 + add r10,rax + mov rax,rbp + adc rdx,0 + mov r11,rdx + + mul r12 + add r11,rax + mov rax,r8 + adc rdx,0 + xor r13,r13 + mov r12,rdx + + + + + + + + + + + mov rbp,r8 + shl r8,32 + mul r15 + shr rbp,32 + add r9,r8 + adc r10,rbp + adc r11,rax + mov rax,QWORD[8+rbx] + adc r12,rdx + adc r13,0 + xor r8,r8 + + + + mov rbp,rax + mul QWORD[rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[8+rsi] + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[16+rsi] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[24+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,r9 + adc r13,rdx + adc r8,0 + + + + mov rbp,r9 + shl r9,32 + mul r15 + shr rbp,32 + add r10,r9 + adc r11,rbp + adc r12,rax + mov rax,QWORD[16+rbx] + adc r13,rdx + adc r8,0 + xor r9,r9 + + + + mov rbp,rax + mul QWORD[rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[8+rsi] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[16+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[24+rsi] + add r13,rcx + adc rdx,0 + add r13,rax + mov rax,r10 + adc r8,rdx + adc r9,0 + + + + mov rbp,r10 + shl r10,32 + mul r15 + shr rbp,32 + add r11,r10 + adc r12,rbp + adc r13,rax + mov rax,QWORD[24+rbx] + adc r8,rdx + adc r9,0 + xor r10,r10 + + + + mov rbp,rax + mul QWORD[rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[8+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[16+rsi] + add r13,rcx + adc rdx,0 + add r13,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD[24+rsi] + add r8,rcx + adc rdx,0 + add r8,rax + mov rax,r11 + adc r9,rdx + adc r10,0 + + + + mov rbp,r11 + shl r11,32 + mul r15 + shr rbp,32 + add r12,r11 + adc r13,rbp + mov rcx,r12 + adc r8,rax + adc r9,rdx + mov rbp,r13 + adc r10,0 + + + + sub r12,-1 + mov rbx,r8 + sbb r13,r14 + sbb r8,0 + mov rdx,r9 + sbb r9,r15 + sbb r10,0 + + cmovc r12,rcx + cmovc r13,rbp + mov QWORD[rdi],r12 + cmovc r8,rbx + mov QWORD[8+rdi],r13 + cmovc r9,rdx + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + + + + + + + + +global ecp_nistz256_sqr_mont + +ALIGN 32 +ecp_nistz256_sqr_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_sqr_mont: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$sqr_body: + cmp ecx,0x80100 + je NEAR $L$sqr_montx + mov rax,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r15,QWORD[16+rsi] + mov r8,QWORD[24+rsi] + + call __ecp_nistz256_sqr_montq + jmp NEAR $L$sqr_mont_done + +ALIGN 32 +$L$sqr_montx: + mov rdx,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r15,QWORD[16+rsi] + mov r8,QWORD[24+rsi] + lea rsi,[((-128))+rsi] + + call __ecp_nistz256_sqr_montx +$L$sqr_mont_done: + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$sqr_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_sqr_mont: + + +ALIGN 32 +__ecp_nistz256_sqr_montq: + + mov r13,rax + mul r14 + mov r9,rax + mov rax,r15 + mov r10,rdx + + mul r13 + add r10,rax + mov rax,r8 + adc rdx,0 + mov r11,rdx + + mul r13 + add r11,rax + mov rax,r15 + adc rdx,0 + mov r12,rdx + + + mul r14 + add r11,rax + mov rax,r8 + adc rdx,0 + mov rbp,rdx + + mul r14 + add r12,rax + mov rax,r8 + adc rdx,0 + add r12,rbp + mov r13,rdx + adc r13,0 + + + mul r15 + xor r15,r15 + add r13,rax + mov rax,QWORD[rsi] + mov r14,rdx + adc r14,0 + + add r9,r9 + adc r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,0 + + mul rax + mov r8,rax + mov rax,QWORD[8+rsi] + mov rcx,rdx + + mul rax + add r9,rcx + adc r10,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + mov rcx,rdx + + mul rax + add r11,rcx + adc r12,rax + mov rax,QWORD[24+rsi] + adc rdx,0 + mov rcx,rdx + + mul rax + add r13,rcx + adc r14,rax + mov rax,r8 + adc r15,rdx + + mov rsi,QWORD[(($L$poly+8))] + mov rbp,QWORD[(($L$poly+24))] + + + + + mov rcx,r8 + shl r8,32 + mul rbp + shr rcx,32 + add r9,r8 + adc r10,rcx + adc r11,rax + mov rax,r9 + adc rdx,0 + + + + mov rcx,r9 + shl r9,32 + mov r8,rdx + mul rbp + shr rcx,32 + add r10,r9 + adc r11,rcx + adc r8,rax + mov rax,r10 + adc rdx,0 + + + + mov rcx,r10 + shl r10,32 + mov r9,rdx + mul rbp + shr rcx,32 + add r11,r10 + adc r8,rcx + adc r9,rax + mov rax,r11 + adc rdx,0 + + + + mov rcx,r11 + shl r11,32 + mov r10,rdx + mul rbp + shr rcx,32 + add r8,r11 + adc r9,rcx + adc r10,rax + adc rdx,0 + xor r11,r11 + + + + add r12,r8 + adc r13,r9 + mov r8,r12 + adc r14,r10 + adc r15,rdx + mov r9,r13 + adc r11,0 + + sub r12,-1 + mov r10,r14 + sbb r13,rsi + sbb r14,0 + mov rcx,r15 + sbb r15,rbp + sbb r11,0 + + cmovc r12,r8 + cmovc r13,r9 + mov QWORD[rdi],r12 + cmovc r14,r10 + mov QWORD[8+rdi],r13 + cmovc r15,rcx + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + + ret + + + +ALIGN 32 +__ecp_nistz256_mul_montx: + + + + mulx r9,r8,r9 + mulx r10,rcx,r10 + mov r14,32 + xor r13,r13 + mulx r11,rbp,r11 + mov r15,QWORD[(($L$poly+24))] + adc r9,rcx + mulx r12,rcx,r12 + mov rdx,r8 + adc r10,rbp + shlx rbp,r8,r14 + adc r11,rcx + shrx rcx,r8,r14 + adc r12,0 + + + + add r9,rbp + adc r10,rcx + + mulx rbp,rcx,r15 + mov rdx,QWORD[8+rbx] + adc r11,rcx + adc r12,rbp + adc r13,0 + xor r8,r8 + + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r9 + adcx r12,rcx + shlx rcx,r9,r14 + adox r13,rbp + shrx rbp,r9,r14 + + adcx r13,r8 + adox r8,r8 + adc r8,0 + + + + add r10,rcx + adc r11,rbp + + mulx rbp,rcx,r15 + mov rdx,QWORD[16+rbx] + adc r12,rcx + adc r13,rbp + adc r8,0 + xor r9,r9 + + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r10 + adcx r13,rcx + shlx rcx,r10,r14 + adox r8,rbp + shrx rbp,r10,r14 + + adcx r8,r9 + adox r9,r9 + adc r9,0 + + + + add r11,rcx + adc r12,rbp + + mulx rbp,rcx,r15 + mov rdx,QWORD[24+rbx] + adc r13,rcx + adc r8,rbp + adc r9,0 + xor r10,r10 + + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r11 + adcx r8,rcx + shlx rcx,r11,r14 + adox r9,rbp + shrx rbp,r11,r14 + + adcx r9,r10 + adox r10,r10 + adc r10,0 + + + + add r12,rcx + adc r13,rbp + + mulx rbp,rcx,r15 + mov rbx,r12 + mov r14,QWORD[(($L$poly+8))] + adc r8,rcx + mov rdx,r13 + adc r9,rbp + adc r10,0 + + + + xor eax,eax + mov rcx,r8 + sbb r12,-1 + sbb r13,r14 + sbb r8,0 + mov rbp,r9 + sbb r9,r15 + sbb r10,0 + + cmovc r12,rbx + cmovc r13,rdx + mov QWORD[rdi],r12 + cmovc r8,rcx + mov QWORD[8+rdi],r13 + cmovc r9,rbp + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + + +ALIGN 32 +__ecp_nistz256_sqr_montx: + + mulx r10,r9,r14 + mulx r11,rcx,r15 + xor eax,eax + adc r10,rcx + mulx r12,rbp,r8 + mov rdx,r14 + adc r11,rbp + adc r12,0 + xor r13,r13 + + + mulx rbp,rcx,r15 + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,r8 + mov rdx,r15 + adcx r12,rcx + adox r13,rbp + adc r13,0 + + + mulx r14,rcx,r8 + mov rdx,QWORD[((0+128))+rsi] + xor r15,r15 + adcx r9,r9 + adox r13,rcx + adcx r10,r10 + adox r14,r15 + + mulx rbp,r8,rdx + mov rdx,QWORD[((8+128))+rsi] + adcx r11,r11 + adox r9,rbp + adcx r12,r12 + mulx rax,rcx,rdx + mov rdx,QWORD[((16+128))+rsi] + adcx r13,r13 + adox r10,rcx + adcx r14,r14 + DB 0x67 + mulx rbp,rcx,rdx + mov rdx,QWORD[((24+128))+rsi] + adox r11,rax + adcx r15,r15 + adox r12,rcx + mov rsi,32 + adox r13,rbp + DB 0x67,0x67 + mulx rax,rcx,rdx + mov rdx,QWORD[(($L$poly+24))] + adox r14,rcx + shlx rcx,r8,rsi + adox r15,rax + shrx rax,r8,rsi + mov rbp,rdx + + + add r9,rcx + adc r10,rax + + mulx r8,rcx,r8 + adc r11,rcx + shlx rcx,r9,rsi + adc r8,0 + shrx rax,r9,rsi + + + add r10,rcx + adc r11,rax + + mulx r9,rcx,r9 + adc r8,rcx + shlx rcx,r10,rsi + adc r9,0 + shrx rax,r10,rsi + + + add r11,rcx + adc r8,rax + + mulx r10,rcx,r10 + adc r9,rcx + shlx rcx,r11,rsi + adc r10,0 + shrx rax,r11,rsi + + + add r8,rcx + adc r9,rax + + mulx r11,rcx,r11 + adc r10,rcx + adc r11,0 + + xor rdx,rdx + add r12,r8 + mov rsi,QWORD[(($L$poly+8))] + adc r13,r9 + mov r8,r12 + adc r14,r10 + adc r15,r11 + mov r9,r13 + adc rdx,0 + + sub r12,-1 + mov r10,r14 + sbb r13,rsi + sbb r14,0 + mov r11,r15 + sbb r15,rbp + sbb rdx,0 + + cmovc r12,r8 + cmovc r13,r9 + mov QWORD[rdi],r12 + cmovc r14,r10 + mov QWORD[8+rdi],r13 + cmovc r15,r11 + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + + ret + + + + +global ecp_nistz256_select_w5 + +ALIGN 32 +ecp_nistz256_select_w5: + +_CET_ENDBR + lea rax,[OPENSSL_ia32cap_P] + mov rax,QWORD[8+rax] + test eax,32 + jnz NEAR $L$avx2_select_w5 + lea rax,[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_select_w5: + DB 0x48,0x8d,0x60,0xe0 + DB 0x0f,0x29,0x70,0xe0 + DB 0x0f,0x29,0x78,0xf0 + DB 0x44,0x0f,0x29,0x00 + DB 0x44,0x0f,0x29,0x48,0x10 + DB 0x44,0x0f,0x29,0x50,0x20 + DB 0x44,0x0f,0x29,0x58,0x30 + DB 0x44,0x0f,0x29,0x60,0x40 + DB 0x44,0x0f,0x29,0x68,0x50 + DB 0x44,0x0f,0x29,0x70,0x60 + DB 0x44,0x0f,0x29,0x78,0x70 + movdqa xmm0,XMMWORD[$L$One] + movd xmm1,r8d + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + + movdqa xmm8,xmm0 + pshufd xmm1,xmm1,0 + + mov rax,16 +$L$select_loop_sse_w5: + + movdqa xmm15,xmm8 + paddd xmm8,xmm0 + pcmpeqd xmm15,xmm1 + + movdqa xmm9,XMMWORD[rdx] + movdqa xmm10,XMMWORD[16+rdx] + movdqa xmm11,XMMWORD[32+rdx] + movdqa xmm12,XMMWORD[48+rdx] + movdqa xmm13,XMMWORD[64+rdx] + movdqa xmm14,XMMWORD[80+rdx] + lea rdx,[96+rdx] + + pand xmm9,xmm15 + pand xmm10,xmm15 + por xmm2,xmm9 + pand xmm11,xmm15 + por xmm3,xmm10 + pand xmm12,xmm15 + por xmm4,xmm11 + pand xmm13,xmm15 + por xmm5,xmm12 + pand xmm14,xmm15 + por xmm6,xmm13 + por xmm7,xmm14 + + dec rax + jnz NEAR $L$select_loop_sse_w5 + + movdqu XMMWORD[rcx],xmm2 + movdqu XMMWORD[16+rcx],xmm3 + movdqu XMMWORD[32+rcx],xmm4 + movdqu XMMWORD[48+rcx],xmm5 + movdqu XMMWORD[64+rcx],xmm6 + movdqu XMMWORD[80+rcx],xmm7 + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[168+rsp] + ret + +$L$SEH_end_ecp_nistz256_select_w5: + + + + +global ecp_nistz256_select_w7 + +ALIGN 32 +ecp_nistz256_select_w7: + +_CET_ENDBR + lea rax,[OPENSSL_ia32cap_P] + mov rax,QWORD[8+rax] + test eax,32 + jnz NEAR $L$avx2_select_w7 + lea rax,[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_select_w7: + DB 0x48,0x8d,0x60,0xe0 + DB 0x0f,0x29,0x70,0xe0 + DB 0x0f,0x29,0x78,0xf0 + DB 0x44,0x0f,0x29,0x00 + DB 0x44,0x0f,0x29,0x48,0x10 + DB 0x44,0x0f,0x29,0x50,0x20 + DB 0x44,0x0f,0x29,0x58,0x30 + DB 0x44,0x0f,0x29,0x60,0x40 + DB 0x44,0x0f,0x29,0x68,0x50 + DB 0x44,0x0f,0x29,0x70,0x60 + DB 0x44,0x0f,0x29,0x78,0x70 + movdqa xmm8,XMMWORD[$L$One] + movd xmm1,r8d + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + + movdqa xmm0,xmm8 + pshufd xmm1,xmm1,0 + mov rax,64 + +$L$select_loop_sse_w7: + movdqa xmm15,xmm8 + paddd xmm8,xmm0 + movdqa xmm9,XMMWORD[rdx] + movdqa xmm10,XMMWORD[16+rdx] + pcmpeqd xmm15,xmm1 + movdqa xmm11,XMMWORD[32+rdx] + movdqa xmm12,XMMWORD[48+rdx] + lea rdx,[64+rdx] + + pand xmm9,xmm15 + pand xmm10,xmm15 + por xmm2,xmm9 + pand xmm11,xmm15 + por xmm3,xmm10 + pand xmm12,xmm15 + por xmm4,xmm11 + prefetcht0 [255+rdx] + por xmm5,xmm12 + + dec rax + jnz NEAR $L$select_loop_sse_w7 + + movdqu XMMWORD[rcx],xmm2 + movdqu XMMWORD[16+rcx],xmm3 + movdqu XMMWORD[32+rcx],xmm4 + movdqu XMMWORD[48+rcx],xmm5 + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[168+rsp] + ret + +$L$SEH_end_ecp_nistz256_select_w7: + + + + +ALIGN 32 +ecp_nistz256_avx2_select_w5: + +$L$avx2_select_w5: + vzeroupper + lea rax,[((-136))+rsp] + mov r11,rsp +$L$SEH_begin_ecp_nistz256_avx2_select_w5: + DB 0x48,0x8d,0x60,0xe0 + DB 0xc5,0xf8,0x29,0x70,0xe0 + DB 0xc5,0xf8,0x29,0x78,0xf0 + DB 0xc5,0x78,0x29,0x40,0x00 + DB 0xc5,0x78,0x29,0x48,0x10 + DB 0xc5,0x78,0x29,0x50,0x20 + DB 0xc5,0x78,0x29,0x58,0x30 + DB 0xc5,0x78,0x29,0x60,0x40 + DB 0xc5,0x78,0x29,0x68,0x50 + DB 0xc5,0x78,0x29,0x70,0x60 + DB 0xc5,0x78,0x29,0x78,0x70 + vmovdqa ymm0,YMMWORD[$L$Two] + + vpxor ymm2,ymm2,ymm2 + vpxor ymm3,ymm3,ymm3 + vpxor ymm4,ymm4,ymm4 + + vmovdqa ymm5,YMMWORD[$L$One] + vmovdqa ymm10,YMMWORD[$L$Two] + + vmovd xmm1,r8d + vpermd ymm1,ymm2,ymm1 + + mov rax,8 +$L$select_loop_avx2_w5: + + vmovdqa ymm6,YMMWORD[rdx] + vmovdqa ymm7,YMMWORD[32+rdx] + vmovdqa ymm8,YMMWORD[64+rdx] + + vmovdqa ymm11,YMMWORD[96+rdx] + vmovdqa ymm12,YMMWORD[128+rdx] + vmovdqa ymm13,YMMWORD[160+rdx] + + vpcmpeqd ymm9,ymm5,ymm1 + vpcmpeqd ymm14,ymm10,ymm1 + + vpaddd ymm5,ymm5,ymm0 + vpaddd ymm10,ymm10,ymm0 + lea rdx,[192+rdx] + + vpand ymm6,ymm6,ymm9 + vpand ymm7,ymm7,ymm9 + vpand ymm8,ymm8,ymm9 + vpand ymm11,ymm11,ymm14 + vpand ymm12,ymm12,ymm14 + vpand ymm13,ymm13,ymm14 + + vpxor ymm2,ymm2,ymm6 + vpxor ymm3,ymm3,ymm7 + vpxor ymm4,ymm4,ymm8 + vpxor ymm2,ymm2,ymm11 + vpxor ymm3,ymm3,ymm12 + vpxor ymm4,ymm4,ymm13 + + dec rax + jnz NEAR $L$select_loop_avx2_w5 + + vmovdqu YMMWORD[rcx],ymm2 + vmovdqu YMMWORD[32+rcx],ymm3 + vmovdqu YMMWORD[64+rcx],ymm4 + vzeroupper + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[r11] + ret + +$L$SEH_end_ecp_nistz256_avx2_select_w5: + + + + +global ecp_nistz256_avx2_select_w7 + +ALIGN 32 +ecp_nistz256_avx2_select_w7: + +$L$avx2_select_w7: +_CET_ENDBR + vzeroupper + mov r11,rsp + lea rax,[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_avx2_select_w7: + DB 0x48,0x8d,0x60,0xe0 + DB 0xc5,0xf8,0x29,0x70,0xe0 + DB 0xc5,0xf8,0x29,0x78,0xf0 + DB 0xc5,0x78,0x29,0x40,0x00 + DB 0xc5,0x78,0x29,0x48,0x10 + DB 0xc5,0x78,0x29,0x50,0x20 + DB 0xc5,0x78,0x29,0x58,0x30 + DB 0xc5,0x78,0x29,0x60,0x40 + DB 0xc5,0x78,0x29,0x68,0x50 + DB 0xc5,0x78,0x29,0x70,0x60 + DB 0xc5,0x78,0x29,0x78,0x70 + vmovdqa ymm0,YMMWORD[$L$Three] + + vpxor ymm2,ymm2,ymm2 + vpxor ymm3,ymm3,ymm3 + + vmovdqa ymm4,YMMWORD[$L$One] + vmovdqa ymm8,YMMWORD[$L$Two] + vmovdqa ymm12,YMMWORD[$L$Three] + + vmovd xmm1,r8d + vpermd ymm1,ymm2,ymm1 + + + mov rax,21 +$L$select_loop_avx2_w7: + + vmovdqa ymm5,YMMWORD[rdx] + vmovdqa ymm6,YMMWORD[32+rdx] + + vmovdqa ymm9,YMMWORD[64+rdx] + vmovdqa ymm10,YMMWORD[96+rdx] + + vmovdqa ymm13,YMMWORD[128+rdx] + vmovdqa ymm14,YMMWORD[160+rdx] + + vpcmpeqd ymm7,ymm4,ymm1 + vpcmpeqd ymm11,ymm8,ymm1 + vpcmpeqd ymm15,ymm12,ymm1 + + vpaddd ymm4,ymm4,ymm0 + vpaddd ymm8,ymm8,ymm0 + vpaddd ymm12,ymm12,ymm0 + lea rdx,[192+rdx] + + vpand ymm5,ymm5,ymm7 + vpand ymm6,ymm6,ymm7 + vpand ymm9,ymm9,ymm11 + vpand ymm10,ymm10,ymm11 + vpand ymm13,ymm13,ymm15 + vpand ymm14,ymm14,ymm15 + + vpxor ymm2,ymm2,ymm5 + vpxor ymm3,ymm3,ymm6 + vpxor ymm2,ymm2,ymm9 + vpxor ymm3,ymm3,ymm10 + vpxor ymm2,ymm2,ymm13 + vpxor ymm3,ymm3,ymm14 + + dec rax + jnz NEAR $L$select_loop_avx2_w7 + + + vmovdqa ymm5,YMMWORD[rdx] + vmovdqa ymm6,YMMWORD[32+rdx] + + vpcmpeqd ymm7,ymm4,ymm1 + + vpand ymm5,ymm5,ymm7 + vpand ymm6,ymm6,ymm7 + + vpxor ymm2,ymm2,ymm5 + vpxor ymm3,ymm3,ymm6 + + vmovdqu YMMWORD[rcx],ymm2 + vmovdqu YMMWORD[32+rcx],ymm3 + vzeroupper + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[r11] + ret + +$L$SEH_end_ecp_nistz256_avx2_select_w7: + + +ALIGN 32 +__ecp_nistz256_add_toq: + + xor r11,r11 + add r12,QWORD[rbx] + adc r13,QWORD[8+rbx] + mov rax,r12 + adc r8,QWORD[16+rbx] + adc r9,QWORD[24+rbx] + mov rbp,r13 + adc r11,0 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + cmovc r13,rbp + mov QWORD[rdi],r12 + cmovc r8,rcx + mov QWORD[8+rdi],r13 + cmovc r9,r10 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + + +ALIGN 32 +__ecp_nistz256_sub_fromq: + + sub r12,QWORD[rbx] + sbb r13,QWORD[8+rbx] + mov rax,r12 + sbb r8,QWORD[16+rbx] + sbb r9,QWORD[24+rbx] + mov rbp,r13 + sbb r11,r11 + + add r12,-1 + mov rcx,r8 + adc r13,r14 + adc r8,0 + mov r10,r9 + adc r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD[rdi],r12 + cmovz r8,rcx + mov QWORD[8+rdi],r13 + cmovz r9,r10 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + + +ALIGN 32 +__ecp_nistz256_subq: + + sub rax,r12 + sbb rbp,r13 + mov r12,rax + sbb rcx,r8 + sbb r10,r9 + mov r13,rbp + sbb r11,r11 + + add rax,-1 + mov r8,rcx + adc rbp,r14 + adc rcx,0 + mov r9,r10 + adc r10,r15 + test r11,r11 + + cmovnz r12,rax + cmovnz r13,rbp + cmovnz r8,rcx + cmovnz r9,r10 + + ret + + + + +ALIGN 32 +__ecp_nistz256_mul_by_2q: + + xor r11,r11 + add r12,r12 + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + cmovc r13,rbp + mov QWORD[rdi],r12 + cmovc r8,rcx + mov QWORD[8+rdi],r13 + cmovc r9,r10 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + +global ecp_nistz256_point_double + +ALIGN 32 +ecp_nistz256_point_double: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_double: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 + cmp ecx,0x80100 + je NEAR $L$point_doublex + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,32*5+8 + +$L$point_doubleq_body: + +$L$point_double_shortcutq: + movdqu xmm0,XMMWORD[rsi] + mov rbx,rsi + movdqu xmm1,XMMWORD[16+rsi] + mov r12,QWORD[((32+0))+rsi] + mov r13,QWORD[((32+8))+rsi] + mov r8,QWORD[((32+16))+rsi] + mov r9,QWORD[((32+24))+rsi] + mov r14,QWORD[(($L$poly+8))] + mov r15,QWORD[(($L$poly+24))] + movdqa XMMWORD[96+rsp],xmm0 + movdqa XMMWORD[(96+16)+rsp],xmm1 + lea r10,[32+rdi] + lea r11,[64+rdi] +DB 102,72,15,110,199 +DB 102,73,15,110,202 +DB 102,73,15,110,211 + + lea rdi,[rsp] + call __ecp_nistz256_mul_by_2q + + mov rax,QWORD[((64+0))+rsi] + mov r14,QWORD[((64+8))+rsi] + mov r15,QWORD[((64+16))+rsi] + mov r8,QWORD[((64+24))+rsi] + lea rsi,[((64-0))+rsi] + lea rdi,[64+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[((0+0))+rsp] + mov r14,QWORD[((8+0))+rsp] + lea rsi,[((0+0))+rsp] + mov r15,QWORD[((16+0))+rsp] + mov r8,QWORD[((24+0))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[32+rbx] + mov r9,QWORD[((64+0))+rbx] + mov r10,QWORD[((64+8))+rbx] + mov r11,QWORD[((64+16))+rbx] + mov r12,QWORD[((64+24))+rbx] + lea rsi,[((64-0))+rbx] + lea rbx,[32+rbx] +DB 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + mov r12,QWORD[((96+0))+rsp] + mov r13,QWORD[((96+8))+rsp] + lea rbx,[64+rsp] + mov r8,QWORD[((96+16))+rsp] + mov r9,QWORD[((96+24))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_add_toq + + mov r12,QWORD[((96+0))+rsp] + mov r13,QWORD[((96+8))+rsp] + lea rbx,[64+rsp] + mov r8,QWORD[((96+16))+rsp] + mov r9,QWORD[((96+24))+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD[((0+0))+rsp] + mov r14,QWORD[((8+0))+rsp] + lea rsi,[((0+0))+rsp] + mov r15,QWORD[((16+0))+rsp] + mov r8,QWORD[((24+0))+rsp] +DB 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xor r9,r9 + mov rax,r12 + add r12,-1 + mov r10,r13 + adc r13,rsi + mov rcx,r14 + adc r14,0 + mov r8,r15 + adc r15,rbp + adc r9,0 + xor rsi,rsi + test rax,1 + + cmovz r12,rax + cmovz r13,r10 + cmovz r14,rcx + cmovz r15,r8 + cmovz r9,rsi + + mov rax,r13 + shr r12,1 + shl rax,63 + mov r10,r14 + shr r13,1 + or r12,rax + shl r10,63 + mov rcx,r15 + shr r14,1 + or r13,r10 + shl rcx,63 + mov QWORD[rdi],r12 + shr r15,1 + mov QWORD[8+rdi],r13 + shl r9,63 + or r14,rcx + or r15,r9 + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + mov rax,QWORD[64+rsp] + lea rbx,[64+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montq + + lea rdi,[128+rsp] + call __ecp_nistz256_mul_by_2q + + lea rbx,[32+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_add_toq + + mov rax,QWORD[96+rsp] + lea rbx,[96+rsp] + mov r9,QWORD[((0+0))+rsp] + mov r10,QWORD[((8+0))+rsp] + lea rsi,[((0+0))+rsp] + mov r11,QWORD[((16+0))+rsp] + mov r12,QWORD[((24+0))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_mul_montq + + lea rdi,[128+rsp] + call __ecp_nistz256_mul_by_2q + + mov rax,QWORD[((0+32))+rsp] + mov r14,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r15,QWORD[((16+32))+rsp] + mov r8,QWORD[((24+32))+rsp] +DB 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + lea rbx,[128+rsp] + mov r8,r14 + mov r9,r15 + mov r14,rsi + mov r15,rbp + call __ecp_nistz256_sub_fromq + + mov rax,QWORD[((0+0))+rsp] + mov rbp,QWORD[((0+8))+rsp] + mov rcx,QWORD[((0+16))+rsp] + mov r10,QWORD[((0+24))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_subq + + mov rax,QWORD[32+rsp] + lea rbx,[32+rsp] + mov r14,r12 + xor ecx,ecx + mov QWORD[((0+0))+rsp],r12 + mov r10,r13 + mov QWORD[((0+8))+rsp],r13 + cmovz r11,r8 + mov QWORD[((0+16))+rsp],r8 + lea rsi,[((0-0))+rsp] + cmovz r12,r9 + mov QWORD[((0+24))+rsp],r9 + mov r9,r14 + lea rdi,[rsp] + call __ecp_nistz256_mul_montq + +DB 102,72,15,126,203 +DB 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + lea rsi,[((160+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_doubleq_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_point_double: +global ecp_nistz256_point_add + +ALIGN 32 +ecp_nistz256_point_add: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 + cmp ecx,0x80100 + je NEAR $L$point_addx + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,32*18+8 + +$L$point_addq_body: + + movdqu xmm0,XMMWORD[rsi] + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm3,XMMWORD[48+rsi] + movdqu xmm4,XMMWORD[64+rsi] + movdqu xmm5,XMMWORD[80+rsi] + mov rbx,rsi + mov rsi,rdx + movdqa XMMWORD[384+rsp],xmm0 + movdqa XMMWORD[(384+16)+rsp],xmm1 + movdqa XMMWORD[416+rsp],xmm2 + movdqa XMMWORD[(416+16)+rsp],xmm3 + movdqa XMMWORD[448+rsp],xmm4 + movdqa XMMWORD[(448+16)+rsp],xmm5 + por xmm5,xmm4 + + movdqu xmm0,XMMWORD[rsi] + pshufd xmm3,xmm5,0xb1 + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + por xmm5,xmm3 + movdqu xmm3,XMMWORD[48+rsi] + mov rax,QWORD[((64+0))+rsi] + mov r14,QWORD[((64+8))+rsi] + mov r15,QWORD[((64+16))+rsi] + mov r8,QWORD[((64+24))+rsi] + movdqa XMMWORD[480+rsp],xmm0 + pshufd xmm4,xmm5,0x1e + movdqa XMMWORD[(480+16)+rsp],xmm1 + movdqu xmm0,XMMWORD[64+rsi] + movdqu xmm1,XMMWORD[80+rsi] + movdqa XMMWORD[512+rsp],xmm2 + movdqa XMMWORD[(512+16)+rsp],xmm3 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm1,xmm0 +DB 102,72,15,110,199 + + lea rsi,[((64-0))+rsi] + mov QWORD[((544+0))+rsp],rax + mov QWORD[((544+8))+rsp],r14 + mov QWORD[((544+16))+rsp],r15 + mov QWORD[((544+24))+rsp],r8 + lea rdi,[96+rsp] + call __ecp_nistz256_sqr_montq + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm1,0xb1 + por xmm4,xmm1 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,0x1e + por xmm4,xmm3 + pxor xmm3,xmm3 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + mov rax,QWORD[((64+0))+rbx] + mov r14,QWORD[((64+8))+rbx] + mov r15,QWORD[((64+16))+rbx] + mov r8,QWORD[((64+24))+rbx] +DB 102,72,15,110,203 + + lea rsi,[((64-0))+rbx] + lea rdi,[32+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[544+rsp] + lea rbx,[544+rsp] + mov r9,QWORD[((0+96))+rsp] + mov r10,QWORD[((8+96))+rsp] + lea rsi,[((0+96))+rsp] + mov r11,QWORD[((16+96))+rsp] + mov r12,QWORD[((24+96))+rsp] + lea rdi,[224+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[448+rsp] + lea rbx,[448+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[416+rsp] + lea rbx,[416+rsp] + mov r9,QWORD[((0+224))+rsp] + mov r10,QWORD[((8+224))+rsp] + lea rsi,[((0+224))+rsp] + mov r11,QWORD[((16+224))+rsp] + mov r12,QWORD[((24+224))+rsp] + lea rdi,[224+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[512+rsp] + lea rbx,[512+rsp] + mov r9,QWORD[((0+256))+rsp] + mov r10,QWORD[((8+256))+rsp] + lea rsi,[((0+256))+rsp] + mov r11,QWORD[((16+256))+rsp] + mov r12,QWORD[((24+256))+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,[224+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_sub_fromq + + or r12,r13 + movdqa xmm2,xmm4 + or r12,r8 + or r12,r9 + por xmm2,xmm5 +DB 102,73,15,110,220 + + mov rax,QWORD[384+rsp] + lea rbx,[384+rsp] + mov r9,QWORD[((0+96))+rsp] + mov r10,QWORD[((8+96))+rsp] + lea rsi,[((0+96))+rsp] + mov r11,QWORD[((16+96))+rsp] + mov r12,QWORD[((24+96))+rsp] + lea rdi,[160+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[480+rsp] + lea rbx,[480+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[192+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,[160+rsp] + lea rdi,[rsp] + call __ecp_nistz256_sub_fromq + + or r12,r13 + or r12,r8 + or r12,r9 + +DB 102,73,15,126,208 +DB 102,73,15,126,217 + or r12,r8 + DB 0x3e + jnz NEAR $L$add_proceedq + + + + test r9,r9 + jz NEAR $L$add_doubleq + + + + + + +DB 102,72,15,126,199 + pxor xmm0,xmm0 + movdqu XMMWORD[rdi],xmm0 + movdqu XMMWORD[16+rdi],xmm0 + movdqu XMMWORD[32+rdi],xmm0 + movdqu XMMWORD[48+rdi],xmm0 + movdqu XMMWORD[64+rdi],xmm0 + movdqu XMMWORD[80+rdi],xmm0 + jmp NEAR $L$add_doneq + +ALIGN 32 +$L$add_doubleq: +DB 102,72,15,126,206 +DB 102,72,15,126,199 + add rsp,416 + + jmp NEAR $L$point_double_shortcutq + + +ALIGN 32 +$L$add_proceedq: + mov rax,QWORD[((0+64))+rsp] + mov r14,QWORD[((8+64))+rsp] + lea rsi,[((0+64))+rsp] + mov r15,QWORD[((16+64))+rsp] + mov r8,QWORD[((24+64))+rsp] + lea rdi,[96+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[448+rsp] + lea rbx,[448+rsp] + mov r9,QWORD[((0+0))+rsp] + mov r10,QWORD[((8+0))+rsp] + lea rsi,[((0+0))+rsp] + mov r11,QWORD[((16+0))+rsp] + mov r12,QWORD[((24+0))+rsp] + lea rdi,[352+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[((0+0))+rsp] + mov r14,QWORD[((8+0))+rsp] + lea rsi,[((0+0))+rsp] + mov r15,QWORD[((16+0))+rsp] + mov r8,QWORD[((24+0))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[544+rsp] + lea rbx,[544+rsp] + mov r9,QWORD[((0+352))+rsp] + mov r10,QWORD[((8+352))+rsp] + lea rsi,[((0+352))+rsp] + mov r11,QWORD[((16+352))+rsp] + mov r12,QWORD[((24+352))+rsp] + lea rdi,[352+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[rsp] + lea rbx,[rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[128+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[160+rsp] + lea rbx,[160+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[192+rsp] + call __ecp_nistz256_mul_montq + + + + + xor r11,r11 + add r12,r12 + lea rsi,[96+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + mov rax,QWORD[rsi] + cmovc r13,rbp + mov rbp,QWORD[8+rsi] + cmovc r8,rcx + mov rcx,QWORD[16+rsi] + cmovc r9,r10 + mov r10,QWORD[24+rsi] + + call __ecp_nistz256_subq + + lea rbx,[128+rsp] + lea rdi,[288+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD[((192+0))+rsp] + mov rbp,QWORD[((192+8))+rsp] + mov rcx,QWORD[((192+16))+rsp] + mov r10,QWORD[((192+24))+rsp] + lea rdi,[320+rsp] + + call __ecp_nistz256_subq + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + mov rax,QWORD[128+rsp] + lea rbx,[128+rsp] + mov r9,QWORD[((0+224))+rsp] + mov r10,QWORD[((8+224))+rsp] + lea rsi,[((0+224))+rsp] + mov r11,QWORD[((16+224))+rsp] + mov r12,QWORD[((24+224))+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[320+rsp] + lea rbx,[320+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((0+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[320+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,[256+rsp] + lea rdi,[320+rsp] + call __ecp_nistz256_sub_fromq + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[352+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((352+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[544+rsp] + pand xmm3,XMMWORD[((544+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[448+rsp] + pand xmm3,XMMWORD[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[64+rdi],xmm2 + movdqu XMMWORD[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[480+rsp] + pand xmm3,XMMWORD[((480+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[384+rsp] + pand xmm3,XMMWORD[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[rdi],xmm2 + movdqu XMMWORD[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[320+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((320+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[512+rsp] + pand xmm3,XMMWORD[((512+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[416+rsp] + pand xmm3,XMMWORD[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm3 + +$L$add_doneq: + lea rsi,[((576+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_addq_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_point_add: +global ecp_nistz256_point_add_affine + +ALIGN 32 +ecp_nistz256_point_add_affine: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add_affine: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rcx,[OPENSSL_ia32cap_P] + mov rcx,QWORD[8+rcx] + and ecx,0x80100 + cmp ecx,0x80100 + je NEAR $L$point_add_affinex + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,32*15+8 + +$L$add_affineq_body: + + movdqu xmm0,XMMWORD[rsi] + mov rbx,rdx + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm3,XMMWORD[48+rsi] + movdqu xmm4,XMMWORD[64+rsi] + movdqu xmm5,XMMWORD[80+rsi] + mov rax,QWORD[((64+0))+rsi] + mov r14,QWORD[((64+8))+rsi] + mov r15,QWORD[((64+16))+rsi] + mov r8,QWORD[((64+24))+rsi] + movdqa XMMWORD[320+rsp],xmm0 + movdqa XMMWORD[(320+16)+rsp],xmm1 + movdqa XMMWORD[352+rsp],xmm2 + movdqa XMMWORD[(352+16)+rsp],xmm3 + movdqa XMMWORD[384+rsp],xmm4 + movdqa XMMWORD[(384+16)+rsp],xmm5 + por xmm5,xmm4 + + movdqu xmm0,XMMWORD[rbx] + pshufd xmm3,xmm5,0xb1 + movdqu xmm1,XMMWORD[16+rbx] + movdqu xmm2,XMMWORD[32+rbx] + por xmm5,xmm3 + movdqu xmm3,XMMWORD[48+rbx] + movdqa XMMWORD[416+rsp],xmm0 + pshufd xmm4,xmm5,0x1e + movdqa XMMWORD[(416+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD[448+rsp],xmm2 + movdqa XMMWORD[(448+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,[((64-0))+rsi] + lea rdi,[32+rsp] + call __ecp_nistz256_sqr_montq + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,0xb1 + mov rax,QWORD[rbx] + + mov r9,r12 + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,0x1e + mov r10,r13 + por xmm4,xmm3 + pxor xmm3,xmm3 + mov r11,r14 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + + lea rsi,[((32-0))+rsp] + mov r12,r15 + lea rdi,[rsp] + call __ecp_nistz256_mul_montq + + lea rbx,[320+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD[384+rsp] + lea rbx,[384+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[384+rsp] + lea rbx,[384+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((0+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[288+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[448+rsp] + lea rbx,[448+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((0+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,[352+rsp] + lea rdi,[96+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD[((0+64))+rsp] + mov r14,QWORD[((8+64))+rsp] + lea rsi,[((0+64))+rsp] + mov r15,QWORD[((16+64))+rsp] + mov r8,QWORD[((24+64))+rsp] + lea rdi,[128+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[((0+96))+rsp] + mov r14,QWORD[((8+96))+rsp] + lea rsi,[((0+96))+rsp] + mov r15,QWORD[((16+96))+rsp] + mov r8,QWORD[((24+96))+rsp] + lea rdi,[192+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD[128+rsp] + lea rbx,[128+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((0+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[160+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[320+rsp] + lea rbx,[320+rsp] + mov r9,QWORD[((0+128))+rsp] + mov r10,QWORD[((8+128))+rsp] + lea rsi,[((0+128))+rsp] + mov r11,QWORD[((16+128))+rsp] + mov r12,QWORD[((24+128))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_mul_montq + + + + + xor r11,r11 + add r12,r12 + lea rsi,[192+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + mov rax,QWORD[rsi] + cmovc r13,rbp + mov rbp,QWORD[8+rsi] + cmovc r8,rcx + mov rcx,QWORD[16+rsi] + cmovc r9,r10 + mov r10,QWORD[24+rsi] + + call __ecp_nistz256_subq + + lea rbx,[160+rsp] + lea rdi,[224+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD[((0+0))+rsp] + mov rbp,QWORD[((0+8))+rsp] + mov rcx,QWORD[((0+16))+rsp] + mov r10,QWORD[((0+24))+rsp] + lea rdi,[64+rsp] + + call __ecp_nistz256_subq + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + mov rax,QWORD[352+rsp] + lea rbx,[352+rsp] + mov r9,QWORD[((0+160))+rsp] + mov r10,QWORD[((8+160))+rsp] + lea rsi,[((0+160))+rsp] + mov r11,QWORD[((16+160))+rsp] + mov r12,QWORD[((24+160))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD[96+rsp] + lea rbx,[96+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((0+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,[32+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_sub_fromq + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[$L$ONE_mont] + pand xmm3,XMMWORD[(($L$ONE_mont+16))] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[384+rsp] + pand xmm3,XMMWORD[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[64+rdi],xmm2 + movdqu XMMWORD[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[224+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((224+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[416+rsp] + pand xmm3,XMMWORD[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[320+rsp] + pand xmm3,XMMWORD[((320+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[rdi],xmm2 + movdqu XMMWORD[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[256+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((256+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[448+rsp] + pand xmm3,XMMWORD[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[352+rsp] + pand xmm3,XMMWORD[((352+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm3 + + lea rsi,[((480+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$add_affineq_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_point_add_affine: + +ALIGN 32 +__ecp_nistz256_add_tox: + + xor r11,r11 + adc r12,QWORD[rbx] + adc r13,QWORD[8+rbx] + mov rax,r12 + adc r8,QWORD[16+rbx] + adc r9,QWORD[24+rbx] + mov rbp,r13 + adc r11,0 + + xor r10,r10 + sbb r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + cmovc r13,rbp + mov QWORD[rdi],r12 + cmovc r8,rcx + mov QWORD[8+rdi],r13 + cmovc r9,r10 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + + +ALIGN 32 +__ecp_nistz256_sub_fromx: + + xor r11,r11 + sbb r12,QWORD[rbx] + sbb r13,QWORD[8+rbx] + mov rax,r12 + sbb r8,QWORD[16+rbx] + sbb r9,QWORD[24+rbx] + mov rbp,r13 + sbb r11,0 + + xor r10,r10 + adc r12,-1 + mov rcx,r8 + adc r13,r14 + adc r8,0 + mov r10,r9 + adc r9,r15 + + bt r11,0 + cmovnc r12,rax + cmovnc r13,rbp + mov QWORD[rdi],r12 + cmovnc r8,rcx + mov QWORD[8+rdi],r13 + cmovnc r9,r10 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + + +ALIGN 32 +__ecp_nistz256_subx: + + xor r11,r11 + sbb rax,r12 + sbb rbp,r13 + mov r12,rax + sbb rcx,r8 + sbb r10,r9 + mov r13,rbp + sbb r11,0 + + xor r9,r9 + adc rax,-1 + mov r8,rcx + adc rbp,r14 + adc rcx,0 + mov r9,r10 + adc r10,r15 + + bt r11,0 + cmovc r12,rax + cmovc r13,rbp + cmovc r8,rcx + cmovc r9,r10 + + ret + + + + +ALIGN 32 +__ecp_nistz256_mul_by_2x: + + xor r11,r11 + adc r12,r12 + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + xor r10,r10 + sbb r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + cmovc r13,rbp + mov QWORD[rdi],r12 + cmovc r8,rcx + mov QWORD[8+rdi],r13 + cmovc r9,r10 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + ret + + + +ALIGN 32 +ecp_nistz256_point_doublex: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_doublex: + mov rdi,rcx + mov rsi,rdx + + + +$L$point_doublex: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,32*5+8 + +$L$point_doublex_body: + +$L$point_double_shortcutx: + movdqu xmm0,XMMWORD[rsi] + mov rbx,rsi + movdqu xmm1,XMMWORD[16+rsi] + mov r12,QWORD[((32+0))+rsi] + mov r13,QWORD[((32+8))+rsi] + mov r8,QWORD[((32+16))+rsi] + mov r9,QWORD[((32+24))+rsi] + mov r14,QWORD[(($L$poly+8))] + mov r15,QWORD[(($L$poly+24))] + movdqa XMMWORD[96+rsp],xmm0 + movdqa XMMWORD[(96+16)+rsp],xmm1 + lea r10,[32+rdi] + lea r11,[64+rdi] +DB 102,72,15,110,199 +DB 102,73,15,110,202 +DB 102,73,15,110,211 + + lea rdi,[rsp] + call __ecp_nistz256_mul_by_2x + + mov rdx,QWORD[((64+0))+rsi] + mov r14,QWORD[((64+8))+rsi] + mov r15,QWORD[((64+16))+rsi] + mov r8,QWORD[((64+24))+rsi] + lea rsi,[((64-128))+rsi] + lea rdi,[64+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[((0+0))+rsp] + mov r14,QWORD[((8+0))+rsp] + lea rsi,[((-128+0))+rsp] + mov r15,QWORD[((16+0))+rsp] + mov r8,QWORD[((24+0))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[32+rbx] + mov r9,QWORD[((64+0))+rbx] + mov r10,QWORD[((64+8))+rbx] + mov r11,QWORD[((64+16))+rbx] + mov r12,QWORD[((64+24))+rbx] + lea rsi,[((64-128))+rbx] + lea rbx,[32+rbx] +DB 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + mov r12,QWORD[((96+0))+rsp] + mov r13,QWORD[((96+8))+rsp] + lea rbx,[64+rsp] + mov r8,QWORD[((96+16))+rsp] + mov r9,QWORD[((96+24))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_add_tox + + mov r12,QWORD[((96+0))+rsp] + mov r13,QWORD[((96+8))+rsp] + lea rbx,[64+rsp] + mov r8,QWORD[((96+16))+rsp] + mov r9,QWORD[((96+24))+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_sub_fromx + + mov rdx,QWORD[((0+0))+rsp] + mov r14,QWORD[((8+0))+rsp] + lea rsi,[((-128+0))+rsp] + mov r15,QWORD[((16+0))+rsp] + mov r8,QWORD[((24+0))+rsp] +DB 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xor r9,r9 + mov rax,r12 + add r12,-1 + mov r10,r13 + adc r13,rsi + mov rcx,r14 + adc r14,0 + mov r8,r15 + adc r15,rbp + adc r9,0 + xor rsi,rsi + test rax,1 + + cmovz r12,rax + cmovz r13,r10 + cmovz r14,rcx + cmovz r15,r8 + cmovz r9,rsi + + mov rax,r13 + shr r12,1 + shl rax,63 + mov r10,r14 + shr r13,1 + or r12,rax + shl r10,63 + mov rcx,r15 + shr r14,1 + or r13,r10 + shl rcx,63 + mov QWORD[rdi],r12 + shr r15,1 + mov QWORD[8+rdi],r13 + shl r9,63 + or r14,rcx + or r15,r9 + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + mov rdx,QWORD[64+rsp] + lea rbx,[64+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montx + + lea rdi,[128+rsp] + call __ecp_nistz256_mul_by_2x + + lea rbx,[32+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_add_tox + + mov rdx,QWORD[96+rsp] + lea rbx,[96+rsp] + mov r9,QWORD[((0+0))+rsp] + mov r10,QWORD[((8+0))+rsp] + lea rsi,[((-128+0))+rsp] + mov r11,QWORD[((16+0))+rsp] + mov r12,QWORD[((24+0))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_mul_montx + + lea rdi,[128+rsp] + call __ecp_nistz256_mul_by_2x + + mov rdx,QWORD[((0+32))+rsp] + mov r14,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r15,QWORD[((16+32))+rsp] + mov r8,QWORD[((24+32))+rsp] +DB 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + lea rbx,[128+rsp] + mov r8,r14 + mov r9,r15 + mov r14,rsi + mov r15,rbp + call __ecp_nistz256_sub_fromx + + mov rax,QWORD[((0+0))+rsp] + mov rbp,QWORD[((0+8))+rsp] + mov rcx,QWORD[((0+16))+rsp] + mov r10,QWORD[((0+24))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_subx + + mov rdx,QWORD[32+rsp] + lea rbx,[32+rsp] + mov r14,r12 + xor ecx,ecx + mov QWORD[((0+0))+rsp],r12 + mov r10,r13 + mov QWORD[((0+8))+rsp],r13 + cmovz r11,r8 + mov QWORD[((0+16))+rsp],r8 + lea rsi,[((0-128))+rsp] + cmovz r12,r9 + mov QWORD[((0+24))+rsp],r9 + mov r9,r14 + lea rdi,[rsp] + call __ecp_nistz256_mul_montx + +DB 102,72,15,126,203 +DB 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + lea rsi,[((160+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_doublex_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_point_doublex: + +ALIGN 32 +ecp_nistz256_point_addx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_addx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$point_addx: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,32*18+8 + +$L$point_addx_body: + + movdqu xmm0,XMMWORD[rsi] + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm3,XMMWORD[48+rsi] + movdqu xmm4,XMMWORD[64+rsi] + movdqu xmm5,XMMWORD[80+rsi] + mov rbx,rsi + mov rsi,rdx + movdqa XMMWORD[384+rsp],xmm0 + movdqa XMMWORD[(384+16)+rsp],xmm1 + movdqa XMMWORD[416+rsp],xmm2 + movdqa XMMWORD[(416+16)+rsp],xmm3 + movdqa XMMWORD[448+rsp],xmm4 + movdqa XMMWORD[(448+16)+rsp],xmm5 + por xmm5,xmm4 + + movdqu xmm0,XMMWORD[rsi] + pshufd xmm3,xmm5,0xb1 + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + por xmm5,xmm3 + movdqu xmm3,XMMWORD[48+rsi] + mov rdx,QWORD[((64+0))+rsi] + mov r14,QWORD[((64+8))+rsi] + mov r15,QWORD[((64+16))+rsi] + mov r8,QWORD[((64+24))+rsi] + movdqa XMMWORD[480+rsp],xmm0 + pshufd xmm4,xmm5,0x1e + movdqa XMMWORD[(480+16)+rsp],xmm1 + movdqu xmm0,XMMWORD[64+rsi] + movdqu xmm1,XMMWORD[80+rsi] + movdqa XMMWORD[512+rsp],xmm2 + movdqa XMMWORD[(512+16)+rsp],xmm3 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm1,xmm0 +DB 102,72,15,110,199 + + lea rsi,[((64-128))+rsi] + mov QWORD[((544+0))+rsp],rdx + mov QWORD[((544+8))+rsp],r14 + mov QWORD[((544+16))+rsp],r15 + mov QWORD[((544+24))+rsp],r8 + lea rdi,[96+rsp] + call __ecp_nistz256_sqr_montx + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm1,0xb1 + por xmm4,xmm1 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,0x1e + por xmm4,xmm3 + pxor xmm3,xmm3 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + mov rdx,QWORD[((64+0))+rbx] + mov r14,QWORD[((64+8))+rbx] + mov r15,QWORD[((64+16))+rbx] + mov r8,QWORD[((64+24))+rbx] +DB 102,72,15,110,203 + + lea rsi,[((64-128))+rbx] + lea rdi,[32+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[544+rsp] + lea rbx,[544+rsp] + mov r9,QWORD[((0+96))+rsp] + mov r10,QWORD[((8+96))+rsp] + lea rsi,[((-128+96))+rsp] + mov r11,QWORD[((16+96))+rsp] + mov r12,QWORD[((24+96))+rsp] + lea rdi,[224+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[448+rsp] + lea rbx,[448+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[416+rsp] + lea rbx,[416+rsp] + mov r9,QWORD[((0+224))+rsp] + mov r10,QWORD[((8+224))+rsp] + lea rsi,[((-128+224))+rsp] + mov r11,QWORD[((16+224))+rsp] + mov r12,QWORD[((24+224))+rsp] + lea rdi,[224+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[512+rsp] + lea rbx,[512+rsp] + mov r9,QWORD[((0+256))+rsp] + mov r10,QWORD[((8+256))+rsp] + lea rsi,[((-128+256))+rsp] + mov r11,QWORD[((16+256))+rsp] + mov r12,QWORD[((24+256))+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,[224+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_sub_fromx + + or r12,r13 + movdqa xmm2,xmm4 + or r12,r8 + or r12,r9 + por xmm2,xmm5 +DB 102,73,15,110,220 + + mov rdx,QWORD[384+rsp] + lea rbx,[384+rsp] + mov r9,QWORD[((0+96))+rsp] + mov r10,QWORD[((8+96))+rsp] + lea rsi,[((-128+96))+rsp] + mov r11,QWORD[((16+96))+rsp] + mov r12,QWORD[((24+96))+rsp] + lea rdi,[160+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[480+rsp] + lea rbx,[480+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[192+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,[160+rsp] + lea rdi,[rsp] + call __ecp_nistz256_sub_fromx + + or r12,r13 + or r12,r8 + or r12,r9 + +DB 102,73,15,126,208 +DB 102,73,15,126,217 + or r12,r8 + DB 0x3e + jnz NEAR $L$add_proceedx + + + + test r9,r9 + jz NEAR $L$add_doublex + + + + + + +DB 102,72,15,126,199 + pxor xmm0,xmm0 + movdqu XMMWORD[rdi],xmm0 + movdqu XMMWORD[16+rdi],xmm0 + movdqu XMMWORD[32+rdi],xmm0 + movdqu XMMWORD[48+rdi],xmm0 + movdqu XMMWORD[64+rdi],xmm0 + movdqu XMMWORD[80+rdi],xmm0 + jmp NEAR $L$add_donex + +ALIGN 32 +$L$add_doublex: +DB 102,72,15,126,206 +DB 102,72,15,126,199 + add rsp,416 + + jmp NEAR $L$point_double_shortcutx + + +ALIGN 32 +$L$add_proceedx: + mov rdx,QWORD[((0+64))+rsp] + mov r14,QWORD[((8+64))+rsp] + lea rsi,[((-128+64))+rsp] + mov r15,QWORD[((16+64))+rsp] + mov r8,QWORD[((24+64))+rsp] + lea rdi,[96+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[448+rsp] + lea rbx,[448+rsp] + mov r9,QWORD[((0+0))+rsp] + mov r10,QWORD[((8+0))+rsp] + lea rsi,[((-128+0))+rsp] + mov r11,QWORD[((16+0))+rsp] + mov r12,QWORD[((24+0))+rsp] + lea rdi,[352+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[((0+0))+rsp] + mov r14,QWORD[((8+0))+rsp] + lea rsi,[((-128+0))+rsp] + mov r15,QWORD[((16+0))+rsp] + mov r8,QWORD[((24+0))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[544+rsp] + lea rbx,[544+rsp] + mov r9,QWORD[((0+352))+rsp] + mov r10,QWORD[((8+352))+rsp] + lea rsi,[((-128+352))+rsp] + mov r11,QWORD[((16+352))+rsp] + mov r12,QWORD[((24+352))+rsp] + lea rdi,[352+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[rsp] + lea rbx,[rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[128+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[160+rsp] + lea rbx,[160+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[192+rsp] + call __ecp_nistz256_mul_montx + + + + + xor r11,r11 + add r12,r12 + lea rsi,[96+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + mov rax,QWORD[rsi] + cmovc r13,rbp + mov rbp,QWORD[8+rsi] + cmovc r8,rcx + mov rcx,QWORD[16+rsi] + cmovc r9,r10 + mov r10,QWORD[24+rsi] + + call __ecp_nistz256_subx + + lea rbx,[128+rsp] + lea rdi,[288+rsp] + call __ecp_nistz256_sub_fromx + + mov rax,QWORD[((192+0))+rsp] + mov rbp,QWORD[((192+8))+rsp] + mov rcx,QWORD[((192+16))+rsp] + mov r10,QWORD[((192+24))+rsp] + lea rdi,[320+rsp] + + call __ecp_nistz256_subx + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + mov rdx,QWORD[128+rsp] + lea rbx,[128+rsp] + mov r9,QWORD[((0+224))+rsp] + mov r10,QWORD[((8+224))+rsp] + lea rsi,[((-128+224))+rsp] + mov r11,QWORD[((16+224))+rsp] + mov r12,QWORD[((24+224))+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[320+rsp] + lea rbx,[320+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((-128+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[320+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,[256+rsp] + lea rdi,[320+rsp] + call __ecp_nistz256_sub_fromx + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[352+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((352+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[544+rsp] + pand xmm3,XMMWORD[((544+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[448+rsp] + pand xmm3,XMMWORD[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[64+rdi],xmm2 + movdqu XMMWORD[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[480+rsp] + pand xmm3,XMMWORD[((480+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[384+rsp] + pand xmm3,XMMWORD[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[rdi],xmm2 + movdqu XMMWORD[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[320+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((320+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[512+rsp] + pand xmm3,XMMWORD[((512+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[416+rsp] + pand xmm3,XMMWORD[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm3 + +$L$add_donex: + lea rsi,[((576+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_addx_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_point_addx: + +ALIGN 32 +ecp_nistz256_point_add_affinex: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add_affinex: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$point_add_affinex: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,32*15+8 + +$L$add_affinex_body: + + movdqu xmm0,XMMWORD[rsi] + mov rbx,rdx + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm3,XMMWORD[48+rsi] + movdqu xmm4,XMMWORD[64+rsi] + movdqu xmm5,XMMWORD[80+rsi] + mov rdx,QWORD[((64+0))+rsi] + mov r14,QWORD[((64+8))+rsi] + mov r15,QWORD[((64+16))+rsi] + mov r8,QWORD[((64+24))+rsi] + movdqa XMMWORD[320+rsp],xmm0 + movdqa XMMWORD[(320+16)+rsp],xmm1 + movdqa XMMWORD[352+rsp],xmm2 + movdqa XMMWORD[(352+16)+rsp],xmm3 + movdqa XMMWORD[384+rsp],xmm4 + movdqa XMMWORD[(384+16)+rsp],xmm5 + por xmm5,xmm4 + + movdqu xmm0,XMMWORD[rbx] + pshufd xmm3,xmm5,0xb1 + movdqu xmm1,XMMWORD[16+rbx] + movdqu xmm2,XMMWORD[32+rbx] + por xmm5,xmm3 + movdqu xmm3,XMMWORD[48+rbx] + movdqa XMMWORD[416+rsp],xmm0 + pshufd xmm4,xmm5,0x1e + movdqa XMMWORD[(416+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD[448+rsp],xmm2 + movdqa XMMWORD[(448+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,[((64-128))+rsi] + lea rdi,[32+rsp] + call __ecp_nistz256_sqr_montx + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,0xb1 + mov rdx,QWORD[rbx] + + mov r9,r12 + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,0x1e + mov r10,r13 + por xmm4,xmm3 + pxor xmm3,xmm3 + mov r11,r14 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + + lea rsi,[((32-128))+rsp] + mov r12,r15 + lea rdi,[rsp] + call __ecp_nistz256_mul_montx + + lea rbx,[320+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_sub_fromx + + mov rdx,QWORD[384+rsp] + lea rbx,[384+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[384+rsp] + lea rbx,[384+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((-128+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[288+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[448+rsp] + lea rbx,[448+rsp] + mov r9,QWORD[((0+32))+rsp] + mov r10,QWORD[((8+32))+rsp] + lea rsi,[((-128+32))+rsp] + mov r11,QWORD[((16+32))+rsp] + mov r12,QWORD[((24+32))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,[352+rsp] + lea rdi,[96+rsp] + call __ecp_nistz256_sub_fromx + + mov rdx,QWORD[((0+64))+rsp] + mov r14,QWORD[((8+64))+rsp] + lea rsi,[((-128+64))+rsp] + mov r15,QWORD[((16+64))+rsp] + mov r8,QWORD[((24+64))+rsp] + lea rdi,[128+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[((0+96))+rsp] + mov r14,QWORD[((8+96))+rsp] + lea rsi,[((-128+96))+rsp] + mov r15,QWORD[((16+96))+rsp] + mov r8,QWORD[((24+96))+rsp] + lea rdi,[192+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD[128+rsp] + lea rbx,[128+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((-128+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[160+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[320+rsp] + lea rbx,[320+rsp] + mov r9,QWORD[((0+128))+rsp] + mov r10,QWORD[((8+128))+rsp] + lea rsi,[((-128+128))+rsp] + mov r11,QWORD[((16+128))+rsp] + mov r12,QWORD[((24+128))+rsp] + lea rdi,[rsp] + call __ecp_nistz256_mul_montx + + + + + xor r11,r11 + add r12,r12 + lea rsi,[192+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + sbb r11,0 + + cmovc r12,rax + mov rax,QWORD[rsi] + cmovc r13,rbp + mov rbp,QWORD[8+rsi] + cmovc r8,rcx + mov rcx,QWORD[16+rsi] + cmovc r9,r10 + mov r10,QWORD[24+rsi] + + call __ecp_nistz256_subx + + lea rbx,[160+rsp] + lea rdi,[224+rsp] + call __ecp_nistz256_sub_fromx + + mov rax,QWORD[((0+0))+rsp] + mov rbp,QWORD[((0+8))+rsp] + mov rcx,QWORD[((0+16))+rsp] + mov r10,QWORD[((0+24))+rsp] + lea rdi,[64+rsp] + + call __ecp_nistz256_subx + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + mov rdx,QWORD[352+rsp] + lea rbx,[352+rsp] + mov r9,QWORD[((0+160))+rsp] + mov r10,QWORD[((8+160))+rsp] + lea rsi,[((-128+160))+rsp] + mov r11,QWORD[((16+160))+rsp] + mov r12,QWORD[((24+160))+rsp] + lea rdi,[32+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD[96+rsp] + lea rbx,[96+rsp] + mov r9,QWORD[((0+64))+rsp] + mov r10,QWORD[((8+64))+rsp] + lea rsi,[((-128+64))+rsp] + mov r11,QWORD[((16+64))+rsp] + mov r12,QWORD[((24+64))+rsp] + lea rdi,[64+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,[32+rsp] + lea rdi,[256+rsp] + call __ecp_nistz256_sub_fromx + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[$L$ONE_mont] + pand xmm3,XMMWORD[(($L$ONE_mont+16))] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[384+rsp] + pand xmm3,XMMWORD[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[64+rdi],xmm2 + movdqu XMMWORD[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[224+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((224+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[416+rsp] + pand xmm3,XMMWORD[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[320+rsp] + pand xmm3,XMMWORD[((320+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[rdi],xmm2 + movdqu XMMWORD[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD[256+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD[((256+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD[448+rsp] + pand xmm3,XMMWORD[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD[352+rsp] + pand xmm3,XMMWORD[((352+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm3 + + lea rsi,[((480+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$add_affinex_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ecp_nistz256_point_add_affinex: +EXTERN __imp_RtlVirtualUnwind + + +ALIGN 16 +short_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rax,[16+rax] + + mov r12,QWORD[((-8))+rax] + mov r13,QWORD[((-16))+rax] + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +full_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov r10d,DWORD[8+r11] + lea rax,[r10*1+rax] + + mov rbp,QWORD[((-8))+rax] + mov rbx,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_ecp_nistz256_neg wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_neg wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_neg wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_ord_mul_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_mul_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_mul_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_ord_sqr_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_mul_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_mul_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_mul_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_sqr_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_sqr_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_sqr_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_select_w5 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_select_w5 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_select_w7 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_select_w7 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_avx2_select_w5 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_avx2_select_w5 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_avx2_select_w7 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_avx2_select_w7 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_double wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_add wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_add wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_add wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_add_affine wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ecp_nistz256_neg: + DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_ord_mul_mont: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_ord_sqr_mont: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_ord_mul_montx: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_ord_sqr_montx: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_mul_mont: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_sqr_mont: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_select_wX: + DB 0x01,0x33,0x16,0x00 + DB 0x33,0xf8,0x09,0x00 + DB 0x2e,0xe8,0x08,0x00 + DB 0x29,0xd8,0x07,0x00 + DB 0x24,0xc8,0x06,0x00 + DB 0x1f,0xb8,0x05,0x00 + DB 0x1a,0xa8,0x04,0x00 + DB 0x15,0x98,0x03,0x00 + DB 0x10,0x88,0x02,0x00 + DB 0x0c,0x78,0x01,0x00 + DB 0x08,0x68,0x00,0x00 + DB 0x04,0x01,0x15,0x00 +ALIGN 8 +$L$SEH_info_ecp_nistz256_avx2_select_wX: + DB 0x01,0x36,0x17,0x0b + DB 0x36,0xf8,0x09,0x00 + DB 0x31,0xe8,0x08,0x00 + DB 0x2c,0xd8,0x07,0x00 + DB 0x27,0xc8,0x06,0x00 + DB 0x22,0xb8,0x05,0x00 + DB 0x1d,0xa8,0x04,0x00 + DB 0x18,0x98,0x03,0x00 + DB 0x13,0x88,0x02,0x00 + DB 0x0e,0x78,0x01,0x00 + DB 0x09,0x68,0x00,0x00 + DB 0x04,0x01,0x15,0x00 + DB 0x00,0xb3,0x00,0x00 +ALIGN 8 +$L$SEH_info_ecp_nistz256_point_double: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase + DD 32*5+56,0 +$L$SEH_info_ecp_nistz256_point_add: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase + DD 32*18+56,0 +$L$SEH_info_ecp_nistz256_point_add_affine: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase + DD 32*15+56,0 +ALIGN 8 +$L$SEH_info_ecp_nistz256_point_doublex: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase + DD 32*5+56,0 +$L$SEH_info_ecp_nistz256_point_addx: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase + DD 32*18+56,0 +$L$SEH_info_ecp_nistz256_point_add_affinex: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase + DD 32*15+56,0 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-apple.S b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-apple.S new file mode 100644 index 0000000000..49ea9b868b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-apple.S @@ -0,0 +1,309 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include "openssl/arm_arch.h" + +.text +.globl _beeu_mod_inverse_vartime +.private_extern _beeu_mod_inverse_vartime + +.align 4 +_beeu_mod_inverse_vartime: + // Reserve enough space for 14 8-byte registers on the stack + // in the first stp call for x29, x30. + // Then store the remaining callee-saved registers. + // + // | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 | + // ^ ^ + // sp <------------------- 112 bytes ----------------> old sp + // x29 (FP) + // + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-112]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x2,[sp,#96] + + // B = b3..b0 := a + ldp x25,x26,[x1] + ldp x27,x28,[x1,#16] + + // n3..n0 := n + // Note: the value of input params are changed in the following. + ldp x0,x1,[x2] + ldp x2,x30,[x2,#16] + + // A = a3..a0 := n + mov x21, x0 + mov x22, x1 + mov x23, x2 + mov x24, x30 + + // X = x4..x0 := 1 + mov x3, #1 + eor x4, x4, x4 + eor x5, x5, x5 + eor x6, x6, x6 + eor x7, x7, x7 + + // Y = y4..y0 := 0 + eor x8, x8, x8 + eor x9, x9, x9 + eor x10, x10, x10 + eor x11, x11, x11 + eor x12, x12, x12 + +Lbeeu_loop: + // if B == 0, jump to .Lbeeu_loop_end + orr x14, x25, x26 + orr x14, x14, x27 + + // reverse the bit order of x25. This is needed for clz after this macro + rbit x15, x25 + + orr x14, x14, x28 + cbz x14,Lbeeu_loop_end + + + // 0 < B < |n|, + // 0 < A <= |n|, + // (1) X*a == B (mod |n|), + // (2) (-1)*Y*a == A (mod |n|) + + // Now divide B by the maximum possible power of two in the + // integers, and divide X by the same value mod |n|. + // When we're done, (1) still holds. + + // shift := number of trailing 0s in x25 + // ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO) + clz x13, x15 + + // If there is no shift, goto shift_A_Y + cbz x13, Lbeeu_shift_A_Y + + // Shift B right by "x13" bits + neg x14, x13 + lsr x25, x25, x13 + lsl x15, x26, x14 + + lsr x26, x26, x13 + lsl x19, x27, x14 + + orr x25, x25, x15 + + lsr x27, x27, x13 + lsl x20, x28, x14 + + orr x26, x26, x19 + + lsr x28, x28, x13 + + orr x27, x27, x20 + + + // Shift X right by "x13" bits, adding n whenever X becomes odd. + // x13--; + // x14 := 0; needed in the addition to the most significant word in SHIFT1 + eor x14, x14, x14 +Lbeeu_shift_loop_X: + tbz x3, #0, Lshift1_0 + adds x3, x3, x0 + adcs x4, x4, x1 + adcs x5, x5, x2 + adcs x6, x6, x30 + adc x7, x7, x14 +Lshift1_0: + // var0 := [var1|var0]<64..1>; + // i.e. concatenate var1 and var0, + // extract bits <64..1> from the resulting 128-bit value + // and put them in var0 + extr x3, x4, x3, #1 + extr x4, x5, x4, #1 + extr x5, x6, x5, #1 + extr x6, x7, x6, #1 + lsr x7, x7, #1 + + subs x13, x13, #1 + bne Lbeeu_shift_loop_X + + // Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl + // with the following differences: + // - "x13" is set directly to the number of trailing 0s in B + // (using rbit and clz instructions) + // - The loop is only used to call SHIFT1(X) + // and x13 is decreased while executing the X loop. + // - SHIFT256(B, x13) is performed before right-shifting X; they are independent + +Lbeeu_shift_A_Y: + // Same for A and Y. + // Afterwards, (2) still holds. + // Reverse the bit order of x21 + // x13 := number of trailing 0s in x21 (= number of leading 0s in x15) + rbit x15, x21 + clz x13, x15 + + // If there is no shift, goto |B-A|, X+Y update + cbz x13, Lbeeu_update_B_X_or_A_Y + + // Shift A right by "x13" bits + neg x14, x13 + lsr x21, x21, x13 + lsl x15, x22, x14 + + lsr x22, x22, x13 + lsl x19, x23, x14 + + orr x21, x21, x15 + + lsr x23, x23, x13 + lsl x20, x24, x14 + + orr x22, x22, x19 + + lsr x24, x24, x13 + + orr x23, x23, x20 + + + // Shift Y right by "x13" bits, adding n whenever Y becomes odd. + // x13--; + // x14 := 0; needed in the addition to the most significant word in SHIFT1 + eor x14, x14, x14 +Lbeeu_shift_loop_Y: + tbz x8, #0, Lshift1_1 + adds x8, x8, x0 + adcs x9, x9, x1 + adcs x10, x10, x2 + adcs x11, x11, x30 + adc x12, x12, x14 +Lshift1_1: + // var0 := [var1|var0]<64..1>; + // i.e. concatenate var1 and var0, + // extract bits <64..1> from the resulting 128-bit value + // and put them in var0 + extr x8, x9, x8, #1 + extr x9, x10, x9, #1 + extr x10, x11, x10, #1 + extr x11, x12, x11, #1 + lsr x12, x12, #1 + + subs x13, x13, #1 + bne Lbeeu_shift_loop_Y + +Lbeeu_update_B_X_or_A_Y: + // Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow) + // Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words + // without taking a sign bit if generated. The lack of a carry would + // indicate a negative result. See, for example, + // https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes + subs x14, x25, x21 + sbcs x15, x26, x22 + sbcs x19, x27, x23 + sbcs x20, x28, x24 + bcs Lbeeu_B_greater_than_A + + // Else A > B => + // A := A - B; Y := Y + X; goto beginning of the loop + subs x21, x21, x25 + sbcs x22, x22, x26 + sbcs x23, x23, x27 + sbcs x24, x24, x28 + + adds x8, x8, x3 + adcs x9, x9, x4 + adcs x10, x10, x5 + adcs x11, x11, x6 + adc x12, x12, x7 + b Lbeeu_loop + +Lbeeu_B_greater_than_A: + // Continue with B > A => + // B := B - A; X := X + Y; goto beginning of the loop + mov x25, x14 + mov x26, x15 + mov x27, x19 + mov x28, x20 + + adds x3, x3, x8 + adcs x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + b Lbeeu_loop + +Lbeeu_loop_end: + // The Euclid's algorithm loop ends when A == gcd(a,n); + // this would be 1, when a and n are co-prime (i.e. do not have a common factor). + // Since (-1)*Y*a == A (mod |n|), Y>0 + // then out = -Y mod n + + // Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|) + // Is A-1 == 0? + // If not, fail. + sub x14, x21, #1 + orr x14, x14, x22 + orr x14, x14, x23 + orr x14, x14, x24 + cbnz x14, Lbeeu_err + + // If Y>n ==> Y:=Y-n +Lbeeu_reduction_loop: + // x_i := y_i - n_i (X is no longer needed, use it as temp) + // (x14 = 0 from above) + subs x3, x8, x0 + sbcs x4, x9, x1 + sbcs x5, x10, x2 + sbcs x6, x11, x30 + sbcs x7, x12, x14 + + // If result is non-negative (i.e., cs = carry set = no borrow), + // y_i := x_i; goto reduce again + // else + // y_i := y_i; continue + csel x8, x3, x8, cs + csel x9, x4, x9, cs + csel x10, x5, x10, cs + csel x11, x6, x11, cs + csel x12, x7, x12, cs + bcs Lbeeu_reduction_loop + + // Now Y < n (Y cannot be equal to n, since the inverse cannot be 0) + // out = -Y = n-Y + subs x8, x0, x8 + sbcs x9, x1, x9 + sbcs x10, x2, x10 + sbcs x11, x30, x11 + + // Save Y in output (out (x0) was saved on the stack) + ldr x3, [sp,#96] + stp x8, x9, [x3] + stp x10, x11, [x3,#16] + // return 1 (success) + mov x0, #1 + b Lbeeu_finish + +Lbeeu_err: + // return 0 (error) + eor x0, x0, x0 + +Lbeeu_finish: + // Restore callee-saved registers, except x0, x2 + add sp,x29,#0 + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldp x29,x30,[sp],#112 + + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-linux.S b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-linux.S new file mode 100644 index 0000000000..8e09b6184d --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-linux.S @@ -0,0 +1,309 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include "openssl/arm_arch.h" + +.text +.globl beeu_mod_inverse_vartime +.hidden beeu_mod_inverse_vartime +.type beeu_mod_inverse_vartime, %function +.align 4 +beeu_mod_inverse_vartime: + // Reserve enough space for 14 8-byte registers on the stack + // in the first stp call for x29, x30. + // Then store the remaining callee-saved registers. + // + // | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 | + // ^ ^ + // sp <------------------- 112 bytes ----------------> old sp + // x29 (FP) + // + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-112]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x2,[sp,#96] + + // B = b3..b0 := a + ldp x25,x26,[x1] + ldp x27,x28,[x1,#16] + + // n3..n0 := n + // Note: the value of input params are changed in the following. + ldp x0,x1,[x2] + ldp x2,x30,[x2,#16] + + // A = a3..a0 := n + mov x21, x0 + mov x22, x1 + mov x23, x2 + mov x24, x30 + + // X = x4..x0 := 1 + mov x3, #1 + eor x4, x4, x4 + eor x5, x5, x5 + eor x6, x6, x6 + eor x7, x7, x7 + + // Y = y4..y0 := 0 + eor x8, x8, x8 + eor x9, x9, x9 + eor x10, x10, x10 + eor x11, x11, x11 + eor x12, x12, x12 + +.Lbeeu_loop: + // if B == 0, jump to .Lbeeu_loop_end + orr x14, x25, x26 + orr x14, x14, x27 + + // reverse the bit order of x25. This is needed for clz after this macro + rbit x15, x25 + + orr x14, x14, x28 + cbz x14,.Lbeeu_loop_end + + + // 0 < B < |n|, + // 0 < A <= |n|, + // (1) X*a == B (mod |n|), + // (2) (-1)*Y*a == A (mod |n|) + + // Now divide B by the maximum possible power of two in the + // integers, and divide X by the same value mod |n|. + // When we're done, (1) still holds. + + // shift := number of trailing 0s in x25 + // ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO) + clz x13, x15 + + // If there is no shift, goto shift_A_Y + cbz x13, .Lbeeu_shift_A_Y + + // Shift B right by "x13" bits + neg x14, x13 + lsr x25, x25, x13 + lsl x15, x26, x14 + + lsr x26, x26, x13 + lsl x19, x27, x14 + + orr x25, x25, x15 + + lsr x27, x27, x13 + lsl x20, x28, x14 + + orr x26, x26, x19 + + lsr x28, x28, x13 + + orr x27, x27, x20 + + + // Shift X right by "x13" bits, adding n whenever X becomes odd. + // x13--; + // x14 := 0; needed in the addition to the most significant word in SHIFT1 + eor x14, x14, x14 +.Lbeeu_shift_loop_X: + tbz x3, #0, .Lshift1_0 + adds x3, x3, x0 + adcs x4, x4, x1 + adcs x5, x5, x2 + adcs x6, x6, x30 + adc x7, x7, x14 +.Lshift1_0: + // var0 := [var1|var0]<64..1>; + // i.e. concatenate var1 and var0, + // extract bits <64..1> from the resulting 128-bit value + // and put them in var0 + extr x3, x4, x3, #1 + extr x4, x5, x4, #1 + extr x5, x6, x5, #1 + extr x6, x7, x6, #1 + lsr x7, x7, #1 + + subs x13, x13, #1 + bne .Lbeeu_shift_loop_X + + // Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl + // with the following differences: + // - "x13" is set directly to the number of trailing 0s in B + // (using rbit and clz instructions) + // - The loop is only used to call SHIFT1(X) + // and x13 is decreased while executing the X loop. + // - SHIFT256(B, x13) is performed before right-shifting X; they are independent + +.Lbeeu_shift_A_Y: + // Same for A and Y. + // Afterwards, (2) still holds. + // Reverse the bit order of x21 + // x13 := number of trailing 0s in x21 (= number of leading 0s in x15) + rbit x15, x21 + clz x13, x15 + + // If there is no shift, goto |B-A|, X+Y update + cbz x13, .Lbeeu_update_B_X_or_A_Y + + // Shift A right by "x13" bits + neg x14, x13 + lsr x21, x21, x13 + lsl x15, x22, x14 + + lsr x22, x22, x13 + lsl x19, x23, x14 + + orr x21, x21, x15 + + lsr x23, x23, x13 + lsl x20, x24, x14 + + orr x22, x22, x19 + + lsr x24, x24, x13 + + orr x23, x23, x20 + + + // Shift Y right by "x13" bits, adding n whenever Y becomes odd. + // x13--; + // x14 := 0; needed in the addition to the most significant word in SHIFT1 + eor x14, x14, x14 +.Lbeeu_shift_loop_Y: + tbz x8, #0, .Lshift1_1 + adds x8, x8, x0 + adcs x9, x9, x1 + adcs x10, x10, x2 + adcs x11, x11, x30 + adc x12, x12, x14 +.Lshift1_1: + // var0 := [var1|var0]<64..1>; + // i.e. concatenate var1 and var0, + // extract bits <64..1> from the resulting 128-bit value + // and put them in var0 + extr x8, x9, x8, #1 + extr x9, x10, x9, #1 + extr x10, x11, x10, #1 + extr x11, x12, x11, #1 + lsr x12, x12, #1 + + subs x13, x13, #1 + bne .Lbeeu_shift_loop_Y + +.Lbeeu_update_B_X_or_A_Y: + // Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow) + // Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words + // without taking a sign bit if generated. The lack of a carry would + // indicate a negative result. See, for example, + // https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes + subs x14, x25, x21 + sbcs x15, x26, x22 + sbcs x19, x27, x23 + sbcs x20, x28, x24 + bcs .Lbeeu_B_greater_than_A + + // Else A > B => + // A := A - B; Y := Y + X; goto beginning of the loop + subs x21, x21, x25 + sbcs x22, x22, x26 + sbcs x23, x23, x27 + sbcs x24, x24, x28 + + adds x8, x8, x3 + adcs x9, x9, x4 + adcs x10, x10, x5 + adcs x11, x11, x6 + adc x12, x12, x7 + b .Lbeeu_loop + +.Lbeeu_B_greater_than_A: + // Continue with B > A => + // B := B - A; X := X + Y; goto beginning of the loop + mov x25, x14 + mov x26, x15 + mov x27, x19 + mov x28, x20 + + adds x3, x3, x8 + adcs x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + b .Lbeeu_loop + +.Lbeeu_loop_end: + // The Euclid's algorithm loop ends when A == gcd(a,n); + // this would be 1, when a and n are co-prime (i.e. do not have a common factor). + // Since (-1)*Y*a == A (mod |n|), Y>0 + // then out = -Y mod n + + // Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|) + // Is A-1 == 0? + // If not, fail. + sub x14, x21, #1 + orr x14, x14, x22 + orr x14, x14, x23 + orr x14, x14, x24 + cbnz x14, .Lbeeu_err + + // If Y>n ==> Y:=Y-n +.Lbeeu_reduction_loop: + // x_i := y_i - n_i (X is no longer needed, use it as temp) + // (x14 = 0 from above) + subs x3, x8, x0 + sbcs x4, x9, x1 + sbcs x5, x10, x2 + sbcs x6, x11, x30 + sbcs x7, x12, x14 + + // If result is non-negative (i.e., cs = carry set = no borrow), + // y_i := x_i; goto reduce again + // else + // y_i := y_i; continue + csel x8, x3, x8, cs + csel x9, x4, x9, cs + csel x10, x5, x10, cs + csel x11, x6, x11, cs + csel x12, x7, x12, cs + bcs .Lbeeu_reduction_loop + + // Now Y < n (Y cannot be equal to n, since the inverse cannot be 0) + // out = -Y = n-Y + subs x8, x0, x8 + sbcs x9, x1, x9 + sbcs x10, x2, x10 + sbcs x11, x30, x11 + + // Save Y in output (out (x0) was saved on the stack) + ldr x3, [sp,#96] + stp x8, x9, [x3] + stp x10, x11, [x3,#16] + // return 1 (success) + mov x0, #1 + b .Lbeeu_finish + +.Lbeeu_err: + // return 0 (error) + eor x0, x0, x0 + +.Lbeeu_finish: + // Restore callee-saved registers, except x0, x2 + add sp,x29,#0 + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldp x29,x30,[sp],#112 + + AARCH64_VALIDATE_LINK_REGISTER + ret +.size beeu_mod_inverse_vartime,.-beeu_mod_inverse_vartime +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-win.S b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-win.S new file mode 100644 index 0000000000..ac6eb17c71 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-armv8-asm-win.S @@ -0,0 +1,309 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include "openssl/arm_arch.h" + +.text +.globl beeu_mod_inverse_vartime + + +.align 4 +beeu_mod_inverse_vartime: + // Reserve enough space for 14 8-byte registers on the stack + // in the first stp call for x29, x30. + // Then store the remaining callee-saved registers. + // + // | x29 | x30 | x19 | x20 | ... | x27 | x28 | x0 | x2 | + // ^ ^ + // sp <------------------- 112 bytes ----------------> old sp + // x29 (FP) + // + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-112]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x2,[sp,#96] + + // B = b3..b0 := a + ldp x25,x26,[x1] + ldp x27,x28,[x1,#16] + + // n3..n0 := n + // Note: the value of input params are changed in the following. + ldp x0,x1,[x2] + ldp x2,x30,[x2,#16] + + // A = a3..a0 := n + mov x21, x0 + mov x22, x1 + mov x23, x2 + mov x24, x30 + + // X = x4..x0 := 1 + mov x3, #1 + eor x4, x4, x4 + eor x5, x5, x5 + eor x6, x6, x6 + eor x7, x7, x7 + + // Y = y4..y0 := 0 + eor x8, x8, x8 + eor x9, x9, x9 + eor x10, x10, x10 + eor x11, x11, x11 + eor x12, x12, x12 + +Lbeeu_loop: + // if B == 0, jump to .Lbeeu_loop_end + orr x14, x25, x26 + orr x14, x14, x27 + + // reverse the bit order of x25. This is needed for clz after this macro + rbit x15, x25 + + orr x14, x14, x28 + cbz x14,Lbeeu_loop_end + + + // 0 < B < |n|, + // 0 < A <= |n|, + // (1) X*a == B (mod |n|), + // (2) (-1)*Y*a == A (mod |n|) + + // Now divide B by the maximum possible power of two in the + // integers, and divide X by the same value mod |n|. + // When we're done, (1) still holds. + + // shift := number of trailing 0s in x25 + // ( = number of leading 0s in x15; see the "rbit" instruction in TEST_B_ZERO) + clz x13, x15 + + // If there is no shift, goto shift_A_Y + cbz x13, Lbeeu_shift_A_Y + + // Shift B right by "x13" bits + neg x14, x13 + lsr x25, x25, x13 + lsl x15, x26, x14 + + lsr x26, x26, x13 + lsl x19, x27, x14 + + orr x25, x25, x15 + + lsr x27, x27, x13 + lsl x20, x28, x14 + + orr x26, x26, x19 + + lsr x28, x28, x13 + + orr x27, x27, x20 + + + // Shift X right by "x13" bits, adding n whenever X becomes odd. + // x13--; + // x14 := 0; needed in the addition to the most significant word in SHIFT1 + eor x14, x14, x14 +Lbeeu_shift_loop_X: + tbz x3, #0, Lshift1_0 + adds x3, x3, x0 + adcs x4, x4, x1 + adcs x5, x5, x2 + adcs x6, x6, x30 + adc x7, x7, x14 +Lshift1_0: + // var0 := [var1|var0]<64..1>; + // i.e. concatenate var1 and var0, + // extract bits <64..1> from the resulting 128-bit value + // and put them in var0 + extr x3, x4, x3, #1 + extr x4, x5, x4, #1 + extr x5, x6, x5, #1 + extr x6, x7, x6, #1 + lsr x7, x7, #1 + + subs x13, x13, #1 + bne Lbeeu_shift_loop_X + + // Note: the steps above perform the same sequence as in p256_beeu-x86_64-asm.pl + // with the following differences: + // - "x13" is set directly to the number of trailing 0s in B + // (using rbit and clz instructions) + // - The loop is only used to call SHIFT1(X) + // and x13 is decreased while executing the X loop. + // - SHIFT256(B, x13) is performed before right-shifting X; they are independent + +Lbeeu_shift_A_Y: + // Same for A and Y. + // Afterwards, (2) still holds. + // Reverse the bit order of x21 + // x13 := number of trailing 0s in x21 (= number of leading 0s in x15) + rbit x15, x21 + clz x13, x15 + + // If there is no shift, goto |B-A|, X+Y update + cbz x13, Lbeeu_update_B_X_or_A_Y + + // Shift A right by "x13" bits + neg x14, x13 + lsr x21, x21, x13 + lsl x15, x22, x14 + + lsr x22, x22, x13 + lsl x19, x23, x14 + + orr x21, x21, x15 + + lsr x23, x23, x13 + lsl x20, x24, x14 + + orr x22, x22, x19 + + lsr x24, x24, x13 + + orr x23, x23, x20 + + + // Shift Y right by "x13" bits, adding n whenever Y becomes odd. + // x13--; + // x14 := 0; needed in the addition to the most significant word in SHIFT1 + eor x14, x14, x14 +Lbeeu_shift_loop_Y: + tbz x8, #0, Lshift1_1 + adds x8, x8, x0 + adcs x9, x9, x1 + adcs x10, x10, x2 + adcs x11, x11, x30 + adc x12, x12, x14 +Lshift1_1: + // var0 := [var1|var0]<64..1>; + // i.e. concatenate var1 and var0, + // extract bits <64..1> from the resulting 128-bit value + // and put them in var0 + extr x8, x9, x8, #1 + extr x9, x10, x9, #1 + extr x10, x11, x10, #1 + extr x11, x12, x11, #1 + lsr x12, x12, #1 + + subs x13, x13, #1 + bne Lbeeu_shift_loop_Y + +Lbeeu_update_B_X_or_A_Y: + // Try T := B - A; if cs, continue with B > A (cs: carry set = no borrow) + // Note: this is a case of unsigned arithmetic, where T fits in 4 64-bit words + // without taking a sign bit if generated. The lack of a carry would + // indicate a negative result. See, for example, + // https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes + subs x14, x25, x21 + sbcs x15, x26, x22 + sbcs x19, x27, x23 + sbcs x20, x28, x24 + bcs Lbeeu_B_greater_than_A + + // Else A > B => + // A := A - B; Y := Y + X; goto beginning of the loop + subs x21, x21, x25 + sbcs x22, x22, x26 + sbcs x23, x23, x27 + sbcs x24, x24, x28 + + adds x8, x8, x3 + adcs x9, x9, x4 + adcs x10, x10, x5 + adcs x11, x11, x6 + adc x12, x12, x7 + b Lbeeu_loop + +Lbeeu_B_greater_than_A: + // Continue with B > A => + // B := B - A; X := X + Y; goto beginning of the loop + mov x25, x14 + mov x26, x15 + mov x27, x19 + mov x28, x20 + + adds x3, x3, x8 + adcs x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + b Lbeeu_loop + +Lbeeu_loop_end: + // The Euclid's algorithm loop ends when A == gcd(a,n); + // this would be 1, when a and n are co-prime (i.e. do not have a common factor). + // Since (-1)*Y*a == A (mod |n|), Y>0 + // then out = -Y mod n + + // Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|) + // Is A-1 == 0? + // If not, fail. + sub x14, x21, #1 + orr x14, x14, x22 + orr x14, x14, x23 + orr x14, x14, x24 + cbnz x14, Lbeeu_err + + // If Y>n ==> Y:=Y-n +Lbeeu_reduction_loop: + // x_i := y_i - n_i (X is no longer needed, use it as temp) + // (x14 = 0 from above) + subs x3, x8, x0 + sbcs x4, x9, x1 + sbcs x5, x10, x2 + sbcs x6, x11, x30 + sbcs x7, x12, x14 + + // If result is non-negative (i.e., cs = carry set = no borrow), + // y_i := x_i; goto reduce again + // else + // y_i := y_i; continue + csel x8, x3, x8, cs + csel x9, x4, x9, cs + csel x10, x5, x10, cs + csel x11, x6, x11, cs + csel x12, x7, x12, cs + bcs Lbeeu_reduction_loop + + // Now Y < n (Y cannot be equal to n, since the inverse cannot be 0) + // out = -Y = n-Y + subs x8, x0, x8 + sbcs x9, x1, x9 + sbcs x10, x2, x10 + sbcs x11, x30, x11 + + // Save Y in output (out (x0) was saved on the stack) + ldr x3, [sp,#96] + stp x8, x9, [x3] + stp x10, x11, [x3,#16] + // return 1 (success) + mov x0, #1 + b Lbeeu_finish + +Lbeeu_err: + // return 0 (error) + eor x0, x0, x0 + +Lbeeu_finish: + // Restore callee-saved registers, except x0, x2 + add sp,x29,#0 + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldp x29,x30,[sp],#112 + + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-apple.S b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-apple.S new file mode 100644 index 0000000000..fc6552c57d --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-apple.S @@ -0,0 +1,322 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + +.private_extern _beeu_mod_inverse_vartime +.globl _beeu_mod_inverse_vartime +.private_extern _beeu_mod_inverse_vartime +.p2align 5 +_beeu_mod_inverse_vartime: + +_CET_ENDBR + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rbx + + pushq %rsi + + + subq $80,%rsp + + movq %rdi,0(%rsp) + + + movq $1,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %rdi,%rdi + + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + xorq %rbp,%rbp + + + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu %xmm0,48(%rsp) + vmovdqu %xmm1,64(%rsp) + + vmovdqu 0(%rdx),%xmm0 + vmovdqu 16(%rdx),%xmm1 + vmovdqu %xmm0,16(%rsp) + vmovdqu %xmm1,32(%rsp) + +L$beeu_loop: + xorq %rbx,%rbx + orq 48(%rsp),%rbx + orq 56(%rsp),%rbx + orq 64(%rsp),%rbx + orq 72(%rsp),%rbx + jz L$beeu_loop_end + + + + + + + + + + + movq $1,%rcx + + +L$beeu_shift_loop_XB: + movq %rcx,%rbx + andq 48(%rsp),%rbx + jnz L$beeu_shift_loop_end_XB + + + movq $1,%rbx + andq %r8,%rbx + jz L$shift1_0 + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq $0,%rdi + +L$shift1_0: + shrdq $1,%r9,%r8 + shrdq $1,%r10,%r9 + shrdq $1,%r11,%r10 + shrdq $1,%rdi,%r11 + shrq $1,%rdi + + shlq $1,%rcx + + + + + + cmpq $0x8000000,%rcx + jne L$beeu_shift_loop_XB + +L$beeu_shift_loop_end_XB: + bsfq %rcx,%rcx + testq %rcx,%rcx + jz L$beeu_no_shift_XB + + + + movq 8+48(%rsp),%rax + movq 16+48(%rsp),%rbx + movq 24+48(%rsp),%rsi + + shrdq %cl,%rax,0+48(%rsp) + shrdq %cl,%rbx,8+48(%rsp) + shrdq %cl,%rsi,16+48(%rsp) + + shrq %cl,%rsi + movq %rsi,24+48(%rsp) + + +L$beeu_no_shift_XB: + + movq $1,%rcx + + +L$beeu_shift_loop_YA: + movq %rcx,%rbx + andq 16(%rsp),%rbx + jnz L$beeu_shift_loop_end_YA + + + movq $1,%rbx + andq %r12,%rbx + jz L$shift1_1 + addq 0(%rdx),%r12 + adcq 8(%rdx),%r13 + adcq 16(%rdx),%r14 + adcq 24(%rdx),%r15 + adcq $0,%rbp + +L$shift1_1: + shrdq $1,%r13,%r12 + shrdq $1,%r14,%r13 + shrdq $1,%r15,%r14 + shrdq $1,%rbp,%r15 + shrq $1,%rbp + + shlq $1,%rcx + + + + + + cmpq $0x8000000,%rcx + jne L$beeu_shift_loop_YA + +L$beeu_shift_loop_end_YA: + bsfq %rcx,%rcx + testq %rcx,%rcx + jz L$beeu_no_shift_YA + + + + movq 8+16(%rsp),%rax + movq 16+16(%rsp),%rbx + movq 24+16(%rsp),%rsi + + shrdq %cl,%rax,0+16(%rsp) + shrdq %cl,%rbx,8+16(%rsp) + shrdq %cl,%rsi,16+16(%rsp) + + shrq %cl,%rsi + movq %rsi,24+16(%rsp) + + +L$beeu_no_shift_YA: + + movq 48(%rsp),%rax + movq 56(%rsp),%rbx + movq 64(%rsp),%rsi + movq 72(%rsp),%rcx + subq 16(%rsp),%rax + sbbq 24(%rsp),%rbx + sbbq 32(%rsp),%rsi + sbbq 40(%rsp),%rcx + jnc L$beeu_B_bigger_than_A + + + movq 16(%rsp),%rax + movq 24(%rsp),%rbx + movq 32(%rsp),%rsi + movq 40(%rsp),%rcx + subq 48(%rsp),%rax + sbbq 56(%rsp),%rbx + sbbq 64(%rsp),%rsi + sbbq 72(%rsp),%rcx + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + movq %rsi,32(%rsp) + movq %rcx,40(%rsp) + + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %r11,%r15 + adcq %rdi,%rbp + jmp L$beeu_loop + +L$beeu_B_bigger_than_A: + + movq %rax,48(%rsp) + movq %rbx,56(%rsp) + movq %rsi,64(%rsp) + movq %rcx,72(%rsp) + + + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rbp,%rdi + + jmp L$beeu_loop + +L$beeu_loop_end: + + + + + movq 16(%rsp),%rbx + subq $1,%rbx + orq 24(%rsp),%rbx + orq 32(%rsp),%rbx + orq 40(%rsp),%rbx + + jnz L$beeu_err + + + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + xorq %rdi,%rdi + +L$beeu_reduction_loop: + movq %r12,16(%rsp) + movq %r13,24(%rsp) + movq %r14,32(%rsp) + movq %r15,40(%rsp) + movq %rbp,48(%rsp) + + + subq %r8,%r12 + sbbq %r9,%r13 + sbbq %r10,%r14 + sbbq %r11,%r15 + sbbq $0,%rbp + + + cmovcq 16(%rsp),%r12 + cmovcq 24(%rsp),%r13 + cmovcq 32(%rsp),%r14 + cmovcq 40(%rsp),%r15 + jnc L$beeu_reduction_loop + + + subq %r12,%r8 + sbbq %r13,%r9 + sbbq %r14,%r10 + sbbq %r15,%r11 + +L$beeu_save: + + movq 0(%rsp),%rdi + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + + movq $1,%rax + jmp L$beeu_finish + +L$beeu_err: + + xorq %rax,%rax + +L$beeu_finish: + addq $80,%rsp + + popq %rsi + + popq %rbx + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + popq %rbp + + ret + + + +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-linux.S b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-linux.S new file mode 100644 index 0000000000..40ae58b560 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-linux.S @@ -0,0 +1,336 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.type beeu_mod_inverse_vartime,@function +.hidden beeu_mod_inverse_vartime +.globl beeu_mod_inverse_vartime +.hidden beeu_mod_inverse_vartime +.align 32 +beeu_mod_inverse_vartime: +.cfi_startproc +_CET_ENDBR + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp,-16 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12,-24 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13,-32 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14,-40 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15,-48 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 +.cfi_offset rsi,-64 + + subq $80,%rsp +.cfi_adjust_cfa_offset 80 + movq %rdi,0(%rsp) + + + movq $1,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %rdi,%rdi + + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + xorq %rbp,%rbp + + + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu %xmm0,48(%rsp) + vmovdqu %xmm1,64(%rsp) + + vmovdqu 0(%rdx),%xmm0 + vmovdqu 16(%rdx),%xmm1 + vmovdqu %xmm0,16(%rsp) + vmovdqu %xmm1,32(%rsp) + +.Lbeeu_loop: + xorq %rbx,%rbx + orq 48(%rsp),%rbx + orq 56(%rsp),%rbx + orq 64(%rsp),%rbx + orq 72(%rsp),%rbx + jz .Lbeeu_loop_end + + + + + + + + + + + movq $1,%rcx + + +.Lbeeu_shift_loop_XB: + movq %rcx,%rbx + andq 48(%rsp),%rbx + jnz .Lbeeu_shift_loop_end_XB + + + movq $1,%rbx + andq %r8,%rbx + jz .Lshift1_0 + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq $0,%rdi + +.Lshift1_0: + shrdq $1,%r9,%r8 + shrdq $1,%r10,%r9 + shrdq $1,%r11,%r10 + shrdq $1,%rdi,%r11 + shrq $1,%rdi + + shlq $1,%rcx + + + + + + cmpq $0x8000000,%rcx + jne .Lbeeu_shift_loop_XB + +.Lbeeu_shift_loop_end_XB: + bsfq %rcx,%rcx + testq %rcx,%rcx + jz .Lbeeu_no_shift_XB + + + + movq 8+48(%rsp),%rax + movq 16+48(%rsp),%rbx + movq 24+48(%rsp),%rsi + + shrdq %cl,%rax,0+48(%rsp) + shrdq %cl,%rbx,8+48(%rsp) + shrdq %cl,%rsi,16+48(%rsp) + + shrq %cl,%rsi + movq %rsi,24+48(%rsp) + + +.Lbeeu_no_shift_XB: + + movq $1,%rcx + + +.Lbeeu_shift_loop_YA: + movq %rcx,%rbx + andq 16(%rsp),%rbx + jnz .Lbeeu_shift_loop_end_YA + + + movq $1,%rbx + andq %r12,%rbx + jz .Lshift1_1 + addq 0(%rdx),%r12 + adcq 8(%rdx),%r13 + adcq 16(%rdx),%r14 + adcq 24(%rdx),%r15 + adcq $0,%rbp + +.Lshift1_1: + shrdq $1,%r13,%r12 + shrdq $1,%r14,%r13 + shrdq $1,%r15,%r14 + shrdq $1,%rbp,%r15 + shrq $1,%rbp + + shlq $1,%rcx + + + + + + cmpq $0x8000000,%rcx + jne .Lbeeu_shift_loop_YA + +.Lbeeu_shift_loop_end_YA: + bsfq %rcx,%rcx + testq %rcx,%rcx + jz .Lbeeu_no_shift_YA + + + + movq 8+16(%rsp),%rax + movq 16+16(%rsp),%rbx + movq 24+16(%rsp),%rsi + + shrdq %cl,%rax,0+16(%rsp) + shrdq %cl,%rbx,8+16(%rsp) + shrdq %cl,%rsi,16+16(%rsp) + + shrq %cl,%rsi + movq %rsi,24+16(%rsp) + + +.Lbeeu_no_shift_YA: + + movq 48(%rsp),%rax + movq 56(%rsp),%rbx + movq 64(%rsp),%rsi + movq 72(%rsp),%rcx + subq 16(%rsp),%rax + sbbq 24(%rsp),%rbx + sbbq 32(%rsp),%rsi + sbbq 40(%rsp),%rcx + jnc .Lbeeu_B_bigger_than_A + + + movq 16(%rsp),%rax + movq 24(%rsp),%rbx + movq 32(%rsp),%rsi + movq 40(%rsp),%rcx + subq 48(%rsp),%rax + sbbq 56(%rsp),%rbx + sbbq 64(%rsp),%rsi + sbbq 72(%rsp),%rcx + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + movq %rsi,32(%rsp) + movq %rcx,40(%rsp) + + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %r11,%r15 + adcq %rdi,%rbp + jmp .Lbeeu_loop + +.Lbeeu_B_bigger_than_A: + + movq %rax,48(%rsp) + movq %rbx,56(%rsp) + movq %rsi,64(%rsp) + movq %rcx,72(%rsp) + + + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rbp,%rdi + + jmp .Lbeeu_loop + +.Lbeeu_loop_end: + + + + + movq 16(%rsp),%rbx + subq $1,%rbx + orq 24(%rsp),%rbx + orq 32(%rsp),%rbx + orq 40(%rsp),%rbx + + jnz .Lbeeu_err + + + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + xorq %rdi,%rdi + +.Lbeeu_reduction_loop: + movq %r12,16(%rsp) + movq %r13,24(%rsp) + movq %r14,32(%rsp) + movq %r15,40(%rsp) + movq %rbp,48(%rsp) + + + subq %r8,%r12 + sbbq %r9,%r13 + sbbq %r10,%r14 + sbbq %r11,%r15 + sbbq $0,%rbp + + + cmovcq 16(%rsp),%r12 + cmovcq 24(%rsp),%r13 + cmovcq 32(%rsp),%r14 + cmovcq 40(%rsp),%r15 + jnc .Lbeeu_reduction_loop + + + subq %r12,%r8 + sbbq %r13,%r9 + sbbq %r14,%r10 + sbbq %r15,%r11 + +.Lbeeu_save: + + movq 0(%rsp),%rdi + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + + movq $1,%rax + jmp .Lbeeu_finish + +.Lbeeu_err: + + xorq %rax,%rax + +.Lbeeu_finish: + addq $80,%rsp +.cfi_adjust_cfa_offset -80 + popq %rsi +.cfi_adjust_cfa_offset -8 +.cfi_restore rsi + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore rbx + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore rbp + ret +.cfi_endproc + +.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-win.asm b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-win.asm new file mode 100644 index 0000000000..7c7da683d5 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-win.asm @@ -0,0 +1,346 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + + + +global beeu_mod_inverse_vartime +ALIGN 32 +beeu_mod_inverse_vartime: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_beeu_mod_inverse_vartime: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + push rbx + + push rsi + + + sub rsp,80 + + mov QWORD[rsp],rdi + + + mov r8,1 + xor r9,r9 + xor r10,r10 + xor r11,r11 + xor rdi,rdi + + xor r12,r12 + xor r13,r13 + xor r14,r14 + xor r15,r15 + xor rbp,rbp + + + vmovdqu xmm0,XMMWORD[rsi] + vmovdqu xmm1,XMMWORD[16+rsi] + vmovdqu XMMWORD[48+rsp],xmm0 + vmovdqu XMMWORD[64+rsp],xmm1 + + vmovdqu xmm0,XMMWORD[rdx] + vmovdqu xmm1,XMMWORD[16+rdx] + vmovdqu XMMWORD[16+rsp],xmm0 + vmovdqu XMMWORD[32+rsp],xmm1 + +$L$beeu_loop: + xor rbx,rbx + or rbx,QWORD[48+rsp] + or rbx,QWORD[56+rsp] + or rbx,QWORD[64+rsp] + or rbx,QWORD[72+rsp] + jz NEAR $L$beeu_loop_end + + + + + + + + + + + mov rcx,1 + + +$L$beeu_shift_loop_XB: + mov rbx,rcx + and rbx,QWORD[48+rsp] + jnz NEAR $L$beeu_shift_loop_end_XB + + + mov rbx,1 + and rbx,r8 + jz NEAR $L$shift1_0 + add r8,QWORD[rdx] + adc r9,QWORD[8+rdx] + adc r10,QWORD[16+rdx] + adc r11,QWORD[24+rdx] + adc rdi,0 + +$L$shift1_0: + shrd r8,r9,1 + shrd r9,r10,1 + shrd r10,r11,1 + shrd r11,rdi,1 + shr rdi,1 + + shl rcx,1 + + + + + + cmp rcx,0x8000000 + jne NEAR $L$beeu_shift_loop_XB + +$L$beeu_shift_loop_end_XB: + bsf rcx,rcx + test rcx,rcx + jz NEAR $L$beeu_no_shift_XB + + + + mov rax,QWORD[((8+48))+rsp] + mov rbx,QWORD[((16+48))+rsp] + mov rsi,QWORD[((24+48))+rsp] + + shrd QWORD[((0+48))+rsp],rax,cl + shrd QWORD[((8+48))+rsp],rbx,cl + shrd QWORD[((16+48))+rsp],rsi,cl + + shr rsi,cl + mov QWORD[((24+48))+rsp],rsi + + +$L$beeu_no_shift_XB: + + mov rcx,1 + + +$L$beeu_shift_loop_YA: + mov rbx,rcx + and rbx,QWORD[16+rsp] + jnz NEAR $L$beeu_shift_loop_end_YA + + + mov rbx,1 + and rbx,r12 + jz NEAR $L$shift1_1 + add r12,QWORD[rdx] + adc r13,QWORD[8+rdx] + adc r14,QWORD[16+rdx] + adc r15,QWORD[24+rdx] + adc rbp,0 + +$L$shift1_1: + shrd r12,r13,1 + shrd r13,r14,1 + shrd r14,r15,1 + shrd r15,rbp,1 + shr rbp,1 + + shl rcx,1 + + + + + + cmp rcx,0x8000000 + jne NEAR $L$beeu_shift_loop_YA + +$L$beeu_shift_loop_end_YA: + bsf rcx,rcx + test rcx,rcx + jz NEAR $L$beeu_no_shift_YA + + + + mov rax,QWORD[((8+16))+rsp] + mov rbx,QWORD[((16+16))+rsp] + mov rsi,QWORD[((24+16))+rsp] + + shrd QWORD[((0+16))+rsp],rax,cl + shrd QWORD[((8+16))+rsp],rbx,cl + shrd QWORD[((16+16))+rsp],rsi,cl + + shr rsi,cl + mov QWORD[((24+16))+rsp],rsi + + +$L$beeu_no_shift_YA: + + mov rax,QWORD[48+rsp] + mov rbx,QWORD[56+rsp] + mov rsi,QWORD[64+rsp] + mov rcx,QWORD[72+rsp] + sub rax,QWORD[16+rsp] + sbb rbx,QWORD[24+rsp] + sbb rsi,QWORD[32+rsp] + sbb rcx,QWORD[40+rsp] + jnc NEAR $L$beeu_B_bigger_than_A + + + mov rax,QWORD[16+rsp] + mov rbx,QWORD[24+rsp] + mov rsi,QWORD[32+rsp] + mov rcx,QWORD[40+rsp] + sub rax,QWORD[48+rsp] + sbb rbx,QWORD[56+rsp] + sbb rsi,QWORD[64+rsp] + sbb rcx,QWORD[72+rsp] + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + mov QWORD[32+rsp],rsi + mov QWORD[40+rsp],rcx + + + add r12,r8 + adc r13,r9 + adc r14,r10 + adc r15,r11 + adc rbp,rdi + jmp NEAR $L$beeu_loop + +$L$beeu_B_bigger_than_A: + + mov QWORD[48+rsp],rax + mov QWORD[56+rsp],rbx + mov QWORD[64+rsp],rsi + mov QWORD[72+rsp],rcx + + + add r8,r12 + adc r9,r13 + adc r10,r14 + adc r11,r15 + adc rdi,rbp + + jmp NEAR $L$beeu_loop + +$L$beeu_loop_end: + + + + + mov rbx,QWORD[16+rsp] + sub rbx,1 + or rbx,QWORD[24+rsp] + or rbx,QWORD[32+rsp] + or rbx,QWORD[40+rsp] + + jnz NEAR $L$beeu_err + + + + + mov r8,QWORD[rdx] + mov r9,QWORD[8+rdx] + mov r10,QWORD[16+rdx] + mov r11,QWORD[24+rdx] + xor rdi,rdi + +$L$beeu_reduction_loop: + mov QWORD[16+rsp],r12 + mov QWORD[24+rsp],r13 + mov QWORD[32+rsp],r14 + mov QWORD[40+rsp],r15 + mov QWORD[48+rsp],rbp + + + sub r12,r8 + sbb r13,r9 + sbb r14,r10 + sbb r15,r11 + sbb rbp,0 + + + cmovc r12,QWORD[16+rsp] + cmovc r13,QWORD[24+rsp] + cmovc r14,QWORD[32+rsp] + cmovc r15,QWORD[40+rsp] + jnc NEAR $L$beeu_reduction_loop + + + sub r8,r12 + sbb r9,r13 + sbb r10,r14 + sbb r11,r15 + +$L$beeu_save: + + mov rdi,QWORD[rsp] + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + + + mov rax,1 + jmp NEAR $L$beeu_finish + +$L$beeu_err: + + xor rax,rax + +$L$beeu_finish: + add rsp,80 + + pop rsi + + pop rbx + + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbp + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + + +$L$SEH_end_beeu_mod_inverse_vartime: +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-apple.S new file mode 100644 index 0000000000..5fdf105fbe --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-apple.S @@ -0,0 +1,57 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + + + +.globl _CRYPTO_rdrand +.private_extern _CRYPTO_rdrand + +.p2align 4 +_CRYPTO_rdrand: + +_CET_ENDBR + xorq %rax,%rax +.byte 72,15,199,242 + + adcq %rax,%rax + movq %rdx,0(%rdi) + ret + + + + + + + +.globl _CRYPTO_rdrand_multiple8_buf +.private_extern _CRYPTO_rdrand_multiple8_buf + +.p2align 4 +_CRYPTO_rdrand_multiple8_buf: + +_CET_ENDBR + testq %rsi,%rsi + jz L$out + movq $8,%rdx +L$loop: +.byte 72,15,199,241 + jnc L$err + movq %rcx,0(%rdi) + addq %rdx,%rdi + subq %rdx,%rsi + jnz L$loop +L$out: + movq $1,%rax + ret +L$err: + xorq %rax,%rax + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-linux.S new file mode 100644 index 0000000000..fe81dac3ee --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-linux.S @@ -0,0 +1,57 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + + + + +.globl CRYPTO_rdrand +.hidden CRYPTO_rdrand +.type CRYPTO_rdrand,@function +.align 16 +CRYPTO_rdrand: +.cfi_startproc +_CET_ENDBR + xorq %rax,%rax +.byte 72,15,199,242 + + adcq %rax,%rax + movq %rdx,0(%rdi) + ret +.cfi_endproc +.size CRYPTO_rdrand,.-CRYPTO_rdrand + + + + + +.globl CRYPTO_rdrand_multiple8_buf +.hidden CRYPTO_rdrand_multiple8_buf +.type CRYPTO_rdrand_multiple8_buf,@function +.align 16 +CRYPTO_rdrand_multiple8_buf: +.cfi_startproc +_CET_ENDBR + testq %rsi,%rsi + jz .Lout + movq $8,%rdx +.Lloop: +.byte 72,15,199,241 + jnc .Lerr + movq %rcx,0(%rdi) + addq %rdx,%rdi + subq %rdx,%rsi + jnz .Lloop +.Lout: + movq $1,%rax + ret +.Lerr: + xorq %rax,%rax + ret +.cfi_endproc +.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-win.asm new file mode 100644 index 0000000000..aae3d76bd0 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/rdrand-x86_64-win.asm @@ -0,0 +1,66 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + + + + +global CRYPTO_rdrand + +ALIGN 16 +CRYPTO_rdrand: + +_CET_ENDBR + xor rax,rax +DB 73,15,199,240 + + adc rax,rax + mov QWORD[rcx],r8 + ret + + + + + + + +global CRYPTO_rdrand_multiple8_buf + +ALIGN 16 +CRYPTO_rdrand_multiple8_buf: + +_CET_ENDBR + test rdx,rdx + jz NEAR $L$out + mov r8,8 +$L$loop: +DB 73,15,199,241 + jnc NEAR $L$err + mov QWORD[rcx],r9 + add rcx,r8 + sub rdx,r8 + jnz NEAR $L$loop +$L$out: + mov rax,1 + ret +$L$err: + xor rax,rax + ret + + +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-apple.S b/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-apple.S new file mode 100644 index 0000000000..36723091c0 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-apple.S @@ -0,0 +1,1749 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.globl _rsaz_1024_sqr_avx2 +.private_extern _rsaz_1024_sqr_avx2 + +.p2align 6 +_rsaz_1024_sqr_avx2: + +_CET_ENDBR + leaq (%rsp),%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + vzeroupper + movq %rax,%rbp + + movq %rdx,%r13 + subq $832,%rsp + movq %r13,%r15 + subq $-128,%rdi + subq $-128,%rsi + subq $-128,%r13 + + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + vpxor %ymm9,%ymm9,%ymm9 + jz L$sqr_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%r13),%ymm0 + andq $-2048,%rsp + vmovdqu 32-128(%r13),%ymm1 + vmovdqu 64-128(%r13),%ymm2 + vmovdqu 96-128(%r13),%ymm3 + vmovdqu 128-128(%r13),%ymm4 + vmovdqu 160-128(%r13),%ymm5 + vmovdqu 192-128(%r13),%ymm6 + vmovdqu 224-128(%r13),%ymm7 + vmovdqu 256-128(%r13),%ymm8 + leaq 832+128(%rsp),%r13 + vmovdqu %ymm0,0-128(%r13) + vmovdqu %ymm1,32-128(%r13) + vmovdqu %ymm2,64-128(%r13) + vmovdqu %ymm3,96-128(%r13) + vmovdqu %ymm4,128-128(%r13) + vmovdqu %ymm5,160-128(%r13) + vmovdqu %ymm6,192-128(%r13) + vmovdqu %ymm7,224-128(%r13) + vmovdqu %ymm8,256-128(%r13) + vmovdqu %ymm9,288-128(%r13) + +L$sqr_1024_no_n_copy: + andq $-1024,%rsp + + vmovdqu 32-128(%rsi),%ymm1 + vmovdqu 64-128(%rsi),%ymm2 + vmovdqu 96-128(%rsi),%ymm3 + vmovdqu 128-128(%rsi),%ymm4 + vmovdqu 160-128(%rsi),%ymm5 + vmovdqu 192-128(%rsi),%ymm6 + vmovdqu 224-128(%rsi),%ymm7 + vmovdqu 256-128(%rsi),%ymm8 + + leaq 192(%rsp),%rbx + vmovdqu L$and_mask(%rip),%ymm15 + jmp L$OOP_GRANDE_SQR_1024 + +.p2align 5 +L$OOP_GRANDE_SQR_1024: + leaq 576+128(%rsp),%r9 + leaq 448(%rsp),%r12 + + + + + vpaddq %ymm1,%ymm1,%ymm1 + vpbroadcastq 0-128(%rsi),%ymm10 + vpaddq %ymm2,%ymm2,%ymm2 + vmovdqa %ymm1,0-128(%r9) + vpaddq %ymm3,%ymm3,%ymm3 + vmovdqa %ymm2,32-128(%r9) + vpaddq %ymm4,%ymm4,%ymm4 + vmovdqa %ymm3,64-128(%r9) + vpaddq %ymm5,%ymm5,%ymm5 + vmovdqa %ymm4,96-128(%r9) + vpaddq %ymm6,%ymm6,%ymm6 + vmovdqa %ymm5,128-128(%r9) + vpaddq %ymm7,%ymm7,%ymm7 + vmovdqa %ymm6,160-128(%r9) + vpaddq %ymm8,%ymm8,%ymm8 + vmovdqa %ymm7,192-128(%r9) + vpxor %ymm9,%ymm9,%ymm9 + vmovdqa %ymm8,224-128(%r9) + + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpbroadcastq 32-128(%rsi),%ymm11 + vmovdqu %ymm9,288-192(%rbx) + vpmuludq %ymm10,%ymm1,%ymm1 + vmovdqu %ymm9,320-448(%r12) + vpmuludq %ymm10,%ymm2,%ymm2 + vmovdqu %ymm9,352-448(%r12) + vpmuludq %ymm10,%ymm3,%ymm3 + vmovdqu %ymm9,384-448(%r12) + vpmuludq %ymm10,%ymm4,%ymm4 + vmovdqu %ymm9,416-448(%r12) + vpmuludq %ymm10,%ymm5,%ymm5 + vmovdqu %ymm9,448-448(%r12) + vpmuludq %ymm10,%ymm6,%ymm6 + vmovdqu %ymm9,480-448(%r12) + vpmuludq %ymm10,%ymm7,%ymm7 + vmovdqu %ymm9,512-448(%r12) + vpmuludq %ymm10,%ymm8,%ymm8 + vpbroadcastq 64-128(%rsi),%ymm10 + vmovdqu %ymm9,544-448(%r12) + + movq %rsi,%r15 + movl $4,%r14d + jmp L$sqr_entry_1024 +.p2align 5 +L$OOP_SQR_1024: + vpbroadcastq 32-128(%r15),%ymm11 + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpaddq 0-192(%rbx),%ymm0,%ymm0 + vpmuludq 0-128(%r9),%ymm10,%ymm1 + vpaddq 32-192(%rbx),%ymm1,%ymm1 + vpmuludq 32-128(%r9),%ymm10,%ymm2 + vpaddq 64-192(%rbx),%ymm2,%ymm2 + vpmuludq 64-128(%r9),%ymm10,%ymm3 + vpaddq 96-192(%rbx),%ymm3,%ymm3 + vpmuludq 96-128(%r9),%ymm10,%ymm4 + vpaddq 128-192(%rbx),%ymm4,%ymm4 + vpmuludq 128-128(%r9),%ymm10,%ymm5 + vpaddq 160-192(%rbx),%ymm5,%ymm5 + vpmuludq 160-128(%r9),%ymm10,%ymm6 + vpaddq 192-192(%rbx),%ymm6,%ymm6 + vpmuludq 192-128(%r9),%ymm10,%ymm7 + vpaddq 224-192(%rbx),%ymm7,%ymm7 + vpmuludq 224-128(%r9),%ymm10,%ymm8 + vpbroadcastq 64-128(%r15),%ymm10 + vpaddq 256-192(%rbx),%ymm8,%ymm8 +L$sqr_entry_1024: + vmovdqu %ymm0,0-192(%rbx) + vmovdqu %ymm1,32-192(%rbx) + + vpmuludq 32-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 32-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 64-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 96-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 128-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 160-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 192-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 224-128(%r9),%ymm11,%ymm0 + vpbroadcastq 96-128(%r15),%ymm11 + vpaddq 288-192(%rbx),%ymm0,%ymm0 + + vmovdqu %ymm2,64-192(%rbx) + vmovdqu %ymm3,96-192(%rbx) + + vpmuludq 64-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 64-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 96-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 128-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 160-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 224-128(%r9),%ymm10,%ymm1 + vpbroadcastq 128-128(%r15),%ymm10 + vpaddq 320-448(%r12),%ymm1,%ymm1 + + vmovdqu %ymm4,128-192(%rbx) + vmovdqu %ymm5,160-192(%rbx) + + vpmuludq 96-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 96-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq 128-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm0,%ymm0 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq 224-128(%r9),%ymm11,%ymm2 + vpbroadcastq 160-128(%r15),%ymm11 + vpaddq 352-448(%r12),%ymm2,%ymm2 + + vmovdqu %ymm6,192-192(%rbx) + vmovdqu %ymm7,224-192(%rbx) + + vpmuludq 128-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 128-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 160-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 192-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 224-128(%r9),%ymm10,%ymm3 + vpbroadcastq 192-128(%r15),%ymm10 + vpaddq 384-448(%r12),%ymm3,%ymm3 + + vmovdqu %ymm8,256-192(%rbx) + vmovdqu %ymm0,288-192(%rbx) + leaq 8(%rbx),%rbx + + vpmuludq 160-128(%rsi),%ymm11,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 224-128(%r9),%ymm11,%ymm4 + vpbroadcastq 224-128(%r15),%ymm11 + vpaddq 416-448(%r12),%ymm4,%ymm4 + + vmovdqu %ymm1,320-448(%r12) + vmovdqu %ymm2,352-448(%r12) + + vpmuludq 192-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpbroadcastq 256-128(%r15),%ymm0 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq 224-128(%r9),%ymm10,%ymm5 + vpbroadcastq 0+8-128(%r15),%ymm10 + vpaddq 448-448(%r12),%ymm5,%ymm5 + + vmovdqu %ymm3,384-448(%r12) + vmovdqu %ymm4,416-448(%r12) + leaq 8(%r15),%r15 + + vpmuludq 224-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 224-128(%r9),%ymm11,%ymm6 + vpaddq 480-448(%r12),%ymm6,%ymm6 + + vpmuludq 256-128(%rsi),%ymm0,%ymm7 + vmovdqu %ymm5,448-448(%r12) + vpaddq 512-448(%r12),%ymm7,%ymm7 + vmovdqu %ymm6,480-448(%r12) + vmovdqu %ymm7,512-448(%r12) + leaq 8(%r12),%r12 + + decl %r14d + jnz L$OOP_SQR_1024 + + vmovdqu 256(%rsp),%ymm8 + vmovdqu 288(%rsp),%ymm1 + vmovdqu 320(%rsp),%ymm2 + leaq 192(%rsp),%rbx + + vpsrlq $29,%ymm8,%ymm14 + vpand %ymm15,%ymm8,%ymm8 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + + vpermq $0x93,%ymm14,%ymm14 + vpxor %ymm9,%ymm9,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm8,%ymm8 + vpblendd $3,%ymm11,%ymm9,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,288-192(%rbx) + vmovdqu %ymm2,320-192(%rbx) + + movq (%rsp),%rax + movq 8(%rsp),%r10 + movq 16(%rsp),%r11 + movq 24(%rsp),%r12 + vmovdqu 32(%rsp),%ymm1 + vmovdqu 64-192(%rbx),%ymm2 + vmovdqu 96-192(%rbx),%ymm3 + vmovdqu 128-192(%rbx),%ymm4 + vmovdqu 160-192(%rbx),%ymm5 + vmovdqu 192-192(%rbx),%ymm6 + vmovdqu 224-192(%rbx),%ymm7 + + movq %rax,%r9 + imull %ecx,%eax + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + + movq %rax,%rdx + imulq -128(%r13),%rax + vpbroadcastq %xmm12,%ymm12 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax + shrq $29,%r9 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + addq %r9,%r10 + addq %rax,%r11 + imulq 24-128(%r13),%rdx + addq %rdx,%r12 + + movq %r10,%rax + imull %ecx,%eax + andl $0x1fffffff,%eax + + movl $9,%r14d + jmp L$OOP_REDUCE_1024 + +.p2align 5 +L$OOP_REDUCE_1024: + vmovd %eax,%xmm13 + vpbroadcastq %xmm13,%ymm13 + + vpmuludq 32-128(%r13),%ymm12,%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm10,%ymm1,%ymm1 + addq %rax,%r10 + vpmuludq 64-128(%r13),%ymm12,%ymm14 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm14,%ymm2,%ymm2 + vpmuludq 96-128(%r13),%ymm12,%ymm11 +.byte 0x67 + addq %rax,%r11 +.byte 0x67 + movq %rdx,%rax + imulq 16-128(%r13),%rax + shrq $29,%r10 + vpaddq %ymm11,%ymm3,%ymm3 + vpmuludq 128-128(%r13),%ymm12,%ymm10 + addq %rax,%r12 + addq %r10,%r11 + vpaddq %ymm10,%ymm4,%ymm4 + vpmuludq 160-128(%r13),%ymm12,%ymm14 + movq %r11,%rax + imull %ecx,%eax + vpaddq %ymm14,%ymm5,%ymm5 + vpmuludq 192-128(%r13),%ymm12,%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm11,%ymm6,%ymm6 + vpmuludq 224-128(%r13),%ymm12,%ymm10 + vpaddq %ymm10,%ymm7,%ymm7 + vpmuludq 256-128(%r13),%ymm12,%ymm14 + vmovd %eax,%xmm12 + + vpaddq %ymm14,%ymm8,%ymm8 + + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 32-8-128(%r13),%ymm13,%ymm11 + vmovdqu 96-8-128(%r13),%ymm14 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm1,%ymm1 + vpmuludq 64-8-128(%r13),%ymm13,%ymm10 + vmovdqu 128-8-128(%r13),%ymm11 + addq %rax,%r11 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm10,%ymm2,%ymm2 + addq %r12,%rax + shrq $29,%r11 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 160-8-128(%r13),%ymm10 + addq %r11,%rax + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 192-8-128(%r13),%ymm14 +.byte 0x67 + movq %rax,%r12 + imull %ecx,%eax + vpaddq %ymm11,%ymm4,%ymm4 + vpmuludq %ymm13,%ymm10,%ymm10 +.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm5,%ymm5 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 256-8-128(%r13),%ymm10 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 288-8-128(%r13),%ymm9 + vmovd %eax,%xmm0 + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm7,%ymm7 + vpmuludq %ymm13,%ymm10,%ymm10 + vmovdqu 32-16-128(%r13),%ymm14 + vpbroadcastq %xmm0,%ymm0 + vpaddq %ymm10,%ymm8,%ymm8 + vpmuludq %ymm13,%ymm9,%ymm9 + vmovdqu 64-16-128(%r13),%ymm11 + addq %rax,%r12 + + vmovdqu 32-24-128(%r13),%ymm13 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 96-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq %ymm0,%ymm13,%ymm13 + vpmuludq %ymm12,%ymm11,%ymm11 +.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq %ymm1,%ymm13,%ymm13 + vpaddq %ymm11,%ymm2,%ymm2 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 160-16-128(%r13),%ymm11 +.byte 0x67 + vmovq %xmm13,%rax + vmovdqu %ymm13,(%rsp) + vpaddq %ymm10,%ymm3,%ymm3 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 192-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq %ymm12,%ymm11,%ymm11 + vmovdqu 224-16-128(%r13),%ymm14 + vpaddq %ymm11,%ymm5,%ymm5 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 256-16-128(%r13),%ymm11 + vpaddq %ymm10,%ymm6,%ymm6 + vpmuludq %ymm12,%ymm14,%ymm14 + shrq $29,%r12 + vmovdqu 288-16-128(%r13),%ymm10 + addq %r12,%rax + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq %ymm12,%ymm11,%ymm11 + + movq %rax,%r9 + imull %ecx,%eax + vpaddq %ymm11,%ymm8,%ymm8 + vpmuludq %ymm12,%ymm10,%ymm10 + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + vmovdqu 96-24-128(%r13),%ymm11 +.byte 0x67 + vpaddq %ymm10,%ymm9,%ymm9 + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 64-24-128(%r13),%ymm0,%ymm14 + vmovdqu 128-24-128(%r13),%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + movq 8(%rsp),%r10 + vpaddq %ymm14,%ymm2,%ymm1 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 160-24-128(%r13),%ymm14 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax +.byte 0x67 + shrq $29,%r9 + movq 16(%rsp),%r11 + vpaddq %ymm11,%ymm3,%ymm2 + vpmuludq %ymm0,%ymm10,%ymm10 + vmovdqu 192-24-128(%r13),%ymm11 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + vpaddq %ymm10,%ymm4,%ymm3 + vpmuludq %ymm0,%ymm14,%ymm14 + vmovdqu 224-24-128(%r13),%ymm10 + imulq 24-128(%r13),%rdx + addq %rax,%r11 + leaq (%r9,%r10,1),%rax + vpaddq %ymm14,%ymm5,%ymm4 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 256-24-128(%r13),%ymm14 + movq %rax,%r10 + imull %ecx,%eax + vpmuludq %ymm0,%ymm10,%ymm10 + vpaddq %ymm11,%ymm6,%ymm5 + vmovdqu 288-24-128(%r13),%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm7,%ymm6 + vpmuludq %ymm0,%ymm14,%ymm14 + addq 24(%rsp),%rdx + vpaddq %ymm14,%ymm8,%ymm7 + vpmuludq %ymm0,%ymm11,%ymm11 + vpaddq %ymm11,%ymm9,%ymm8 + vmovq %r12,%xmm9 + movq %rdx,%r12 + + decl %r14d + jnz L$OOP_REDUCE_1024 + leaq 448(%rsp),%r12 + vpaddq %ymm9,%ymm13,%ymm0 + vpxor %ymm9,%ymm9,%ymm9 + + vpaddq 288-192(%rbx),%ymm0,%ymm0 + vpaddq 320-448(%r12),%ymm1,%ymm1 + vpaddq 352-448(%r12),%ymm2,%ymm2 + vpaddq 384-448(%r12),%ymm3,%ymm3 + vpaddq 416-448(%r12),%ymm4,%ymm4 + vpaddq 448-448(%r12),%ymm5,%ymm5 + vpaddq 480-448(%r12),%ymm6,%ymm6 + vpaddq 512-448(%r12),%ymm7,%ymm7 + vpaddq 544-448(%r12),%ymm8,%ymm8 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm13,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vmovdqu %ymm0,0-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,32-128(%rdi) + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vmovdqu %ymm2,64-128(%rdi) + vpaddq %ymm13,%ymm4,%ymm4 + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vmovdqu %ymm4,128-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vmovdqu %ymm5,160-128(%rdi) + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vmovdqu %ymm6,192-128(%rdi) + vpaddq %ymm13,%ymm8,%ymm8 + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + + movq %rdi,%rsi + decl %r8d + jne L$OOP_GRANDE_SQR_1024 + + vzeroall + movq %rbp,%rax + + movq -48(%rax),%r15 + + movq -40(%rax),%r14 + + movq -32(%rax),%r13 + + movq -24(%rax),%r12 + + movq -16(%rax),%rbp + + movq -8(%rax),%rbx + + leaq (%rax),%rsp + +L$sqr_1024_epilogue: + ret + + +.globl _rsaz_1024_mul_avx2 +.private_extern _rsaz_1024_mul_avx2 + +.p2align 6 +_rsaz_1024_mul_avx2: + +_CET_ENDBR + leaq (%rsp),%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + movq %rax,%rbp + + vzeroall + movq %rdx,%r13 + subq $64,%rsp + + + + + + +.byte 0x67,0x67 + movq %rsi,%r15 + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + movq %rsi,%r15 + cmovnzq %r13,%rsi + cmovnzq %r15,%r13 + + movq %rcx,%r15 + subq $-128,%rsi + subq $-128,%rcx + subq $-128,%rdi + + andq $4095,%r15 + addq $320,%r15 +.byte 0x67,0x67 + shrq $12,%r15 + jz L$mul_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%rcx),%ymm0 + andq $-512,%rsp + vmovdqu 32-128(%rcx),%ymm1 + vmovdqu 64-128(%rcx),%ymm2 + vmovdqu 96-128(%rcx),%ymm3 + vmovdqu 128-128(%rcx),%ymm4 + vmovdqu 160-128(%rcx),%ymm5 + vmovdqu 192-128(%rcx),%ymm6 + vmovdqu 224-128(%rcx),%ymm7 + vmovdqu 256-128(%rcx),%ymm8 + leaq 64+128(%rsp),%rcx + vmovdqu %ymm0,0-128(%rcx) + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm1,32-128(%rcx) + vpxor %ymm1,%ymm1,%ymm1 + vmovdqu %ymm2,64-128(%rcx) + vpxor %ymm2,%ymm2,%ymm2 + vmovdqu %ymm3,96-128(%rcx) + vpxor %ymm3,%ymm3,%ymm3 + vmovdqu %ymm4,128-128(%rcx) + vpxor %ymm4,%ymm4,%ymm4 + vmovdqu %ymm5,160-128(%rcx) + vpxor %ymm5,%ymm5,%ymm5 + vmovdqu %ymm6,192-128(%rcx) + vpxor %ymm6,%ymm6,%ymm6 + vmovdqu %ymm7,224-128(%rcx) + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu %ymm8,256-128(%rcx) + vmovdqa %ymm0,%ymm8 + vmovdqu %ymm9,288-128(%rcx) +L$mul_1024_no_n_copy: + andq $-64,%rsp + + movq (%r13),%rbx + vpbroadcastq (%r13),%ymm10 + vmovdqu %ymm0,(%rsp) + xorq %r9,%r9 +.byte 0x67 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + + vmovdqu L$and_mask(%rip),%ymm15 + movl $9,%r14d + vmovdqu %ymm9,288-128(%rdi) + jmp L$oop_mul_1024 + +.p2align 5 +L$oop_mul_1024: + vpsrlq $29,%ymm3,%ymm9 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r9,%rax + movq %rbx,%r10 + imulq 8-128(%rsi),%r10 + addq 8(%rsp),%r10 + + movq %rax,%r9 + imull %r8d,%eax + andl $0x1fffffff,%eax + + movq %rbx,%r11 + imulq 16-128(%rsi),%r11 + addq 16(%rsp),%r11 + + movq %rbx,%r12 + imulq 24-128(%rsi),%r12 + addq 24(%rsp),%r12 + vpmuludq 32-128(%rsi),%ymm10,%ymm0 + vmovd %eax,%xmm11 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq 64-128(%rsi),%ymm10,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 96-128(%rsi),%ymm10,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq 128-128(%rsi),%ymm10,%ymm0 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq 160-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 192-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq 224-128(%rsi),%ymm10,%ymm0 + vpermq $0x93,%ymm9,%ymm9 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq 256-128(%rsi),%ymm10,%ymm12 + vpbroadcastq 8(%r13),%ymm10 + vpaddq %ymm12,%ymm8,%ymm8 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%rcx),%rax + addq %rax,%r11 + shrq $29,%r9 + imulq 24-128(%rcx),%rdx + addq %rdx,%r12 + addq %r9,%r10 + + vpmuludq 32-128(%rcx),%ymm11,%ymm13 + vmovq %xmm10,%rbx + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 64-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm2,%ymm2 + vpmuludq 96-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 128-128(%rcx),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 160-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm5,%ymm5 + vpmuludq 192-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 224-128(%rcx),%ymm11,%ymm13 + vpblendd $3,%ymm14,%ymm9,%ymm12 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 256-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm0,%ymm8,%ymm8 + + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rsi),%ymm12 + movq %rbx,%rax + imulq 8-128(%rsi),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rsi),%ymm13 + + movq %r10,%rax + vpblendd $0xfc,%ymm14,%ymm9,%ymm9 + imull %r8d,%eax + vpaddq %ymm9,%ymm4,%ymm4 + andl $0x1fffffff,%eax + + imulq 16-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovd %eax,%xmm11 + vmovdqu -8+96-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -8+128-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+160-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+192-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -8+224-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+256-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+288-128(%rsi),%ymm9 + vpaddq %ymm12,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm13,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm9,%ymm9 + vpbroadcastq 16(%r13),%ymm10 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rcx),%ymm0 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rcx),%ymm12 + shrq $29,%r10 + imulq 16-128(%rcx),%rdx + addq %rdx,%r12 + addq %r10,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -8+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rsi),%ymm0 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r11,%rax + + vmovdqu -16+64-128(%rsi),%ymm12 + movq %rax,%r11 + imull %r8d,%eax + andl $0x1fffffff,%eax + + imulq 8-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -16+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -16+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -16+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 24(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rcx),%ymm0 + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r11 + vmovdqu -16+64-128(%rcx),%ymm12 + imulq 8-128(%rcx),%rdx + addq %rdx,%r12 + shrq $29,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -16+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+32-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+64-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm9,%ymm9 + + addq %r11,%r12 + imulq -128(%rsi),%rbx + addq %rbx,%r12 + + movq %r12,%rax + imull %r8d,%eax + andl $0x1fffffff,%eax + + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -24+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -24+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -24+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 32(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + addq $32,%r13 + + vmovdqu -24+32-128(%rcx),%ymm0 + imulq -128(%rcx),%rax + addq %rax,%r12 + shrq $29,%r12 + + vmovdqu -24+64-128(%rcx),%ymm12 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -24+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm0 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu %ymm0,(%rsp) + vpaddq %ymm12,%ymm2,%ymm1 + vmovdqu -24+128-128(%rcx),%ymm0 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm2 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm3 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm4 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm5 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+288-128(%rcx),%ymm13 + movq %r12,%r9 + vpaddq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm11,%ymm12,%ymm12 + addq (%rsp),%r9 + vpaddq %ymm12,%ymm8,%ymm7 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovq %r12,%xmm12 + vpaddq %ymm13,%ymm9,%ymm8 + + decl %r14d + jnz L$oop_mul_1024 + vpaddq (%rsp),%ymm12,%ymm0 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm10,%ymm10 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpermq $0x93,%ymm11,%ymm11 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm10,%ymm10 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vmovdqu %ymm0,0-128(%rdi) + vmovdqu %ymm1,32-128(%rdi) + vmovdqu %ymm2,64-128(%rdi) + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vmovdqu %ymm4,128-128(%rdi) + vmovdqu %ymm5,160-128(%rdi) + vmovdqu %ymm6,192-128(%rdi) + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + vzeroupper + + movq %rbp,%rax + + movq -48(%rax),%r15 + + movq -40(%rax),%r14 + + movq -32(%rax),%r13 + + movq -24(%rax),%r12 + + movq -16(%rax),%rbp + + movq -8(%rax),%rbx + + leaq (%rax),%rsp + +L$mul_1024_epilogue: + ret + + +.globl _rsaz_1024_red2norm_avx2 +.private_extern _rsaz_1024_red2norm_avx2 + +.p2align 5 +_rsaz_1024_red2norm_avx2: + +_CET_ENDBR + subq $-128,%rsi + xorq %rax,%rax + movq -128(%rsi),%r8 + movq -120(%rsi),%r9 + movq -112(%rsi),%r10 + shlq $0,%r8 + shlq $29,%r9 + movq %r10,%r11 + shlq $58,%r10 + shrq $6,%r11 + addq %r8,%rax + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,0(%rdi) + movq %r11,%rax + movq -104(%rsi),%r8 + movq -96(%rsi),%r9 + shlq $23,%r8 + movq %r9,%r10 + shlq $52,%r9 + shrq $12,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,8(%rdi) + movq %r10,%rax + movq -88(%rsi),%r11 + movq -80(%rsi),%r8 + shlq $17,%r11 + movq %r8,%r9 + shlq $46,%r8 + shrq $18,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,16(%rdi) + movq %r9,%rax + movq -72(%rsi),%r10 + movq -64(%rsi),%r11 + shlq $11,%r10 + movq %r11,%r8 + shlq $40,%r11 + shrq $24,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,24(%rdi) + movq %r8,%rax + movq -56(%rsi),%r9 + movq -48(%rsi),%r10 + movq -40(%rsi),%r11 + shlq $5,%r9 + shlq $34,%r10 + movq %r11,%r8 + shlq $63,%r11 + shrq $1,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,32(%rdi) + movq %r8,%rax + movq -32(%rsi),%r9 + movq -24(%rsi),%r10 + shlq $28,%r9 + movq %r10,%r11 + shlq $57,%r10 + shrq $7,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,40(%rdi) + movq %r11,%rax + movq -16(%rsi),%r8 + movq -8(%rsi),%r9 + shlq $22,%r8 + movq %r9,%r10 + shlq $51,%r9 + shrq $13,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,48(%rdi) + movq %r10,%rax + movq 0(%rsi),%r11 + movq 8(%rsi),%r8 + shlq $16,%r11 + movq %r8,%r9 + shlq $45,%r8 + shrq $19,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,56(%rdi) + movq %r9,%rax + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + shlq $10,%r10 + movq %r11,%r8 + shlq $39,%r11 + shrq $25,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,64(%rdi) + movq %r8,%rax + movq 32(%rsi),%r9 + movq 40(%rsi),%r10 + movq 48(%rsi),%r11 + shlq $4,%r9 + shlq $33,%r10 + movq %r11,%r8 + shlq $62,%r11 + shrq $2,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,72(%rdi) + movq %r8,%rax + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + shlq $27,%r9 + movq %r10,%r11 + shlq $56,%r10 + shrq $8,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,80(%rdi) + movq %r11,%rax + movq 72(%rsi),%r8 + movq 80(%rsi),%r9 + shlq $21,%r8 + movq %r9,%r10 + shlq $50,%r9 + shrq $14,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,88(%rdi) + movq %r10,%rax + movq 88(%rsi),%r11 + movq 96(%rsi),%r8 + shlq $15,%r11 + movq %r8,%r9 + shlq $44,%r8 + shrq $20,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,96(%rdi) + movq %r9,%rax + movq 104(%rsi),%r10 + movq 112(%rsi),%r11 + shlq $9,%r10 + movq %r11,%r8 + shlq $38,%r11 + shrq $26,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,104(%rdi) + movq %r8,%rax + movq 120(%rsi),%r9 + movq 128(%rsi),%r10 + movq 136(%rsi),%r11 + shlq $3,%r9 + shlq $32,%r10 + movq %r11,%r8 + shlq $61,%r11 + shrq $3,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,112(%rdi) + movq %r8,%rax + movq 144(%rsi),%r9 + movq 152(%rsi),%r10 + shlq $26,%r9 + movq %r10,%r11 + shlq $55,%r10 + shrq $9,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,120(%rdi) + movq %r11,%rax + ret + + + +.globl _rsaz_1024_norm2red_avx2 +.private_extern _rsaz_1024_norm2red_avx2 + +.p2align 5 +_rsaz_1024_norm2red_avx2: + +_CET_ENDBR + subq $-128,%rdi + movq (%rsi),%r8 + movl $0x1fffffff,%eax + movq 8(%rsi),%r9 + movq %r8,%r11 + shrq $0,%r11 + andq %rax,%r11 + movq %r11,-128(%rdi) + movq %r8,%r10 + shrq $29,%r10 + andq %rax,%r10 + movq %r10,-120(%rdi) + shrdq $58,%r9,%r8 + andq %rax,%r8 + movq %r8,-112(%rdi) + movq 16(%rsi),%r10 + movq %r9,%r8 + shrq $23,%r8 + andq %rax,%r8 + movq %r8,-104(%rdi) + shrdq $52,%r10,%r9 + andq %rax,%r9 + movq %r9,-96(%rdi) + movq 24(%rsi),%r11 + movq %r10,%r9 + shrq $17,%r9 + andq %rax,%r9 + movq %r9,-88(%rdi) + shrdq $46,%r11,%r10 + andq %rax,%r10 + movq %r10,-80(%rdi) + movq 32(%rsi),%r8 + movq %r11,%r10 + shrq $11,%r10 + andq %rax,%r10 + movq %r10,-72(%rdi) + shrdq $40,%r8,%r11 + andq %rax,%r11 + movq %r11,-64(%rdi) + movq 40(%rsi),%r9 + movq %r8,%r11 + shrq $5,%r11 + andq %rax,%r11 + movq %r11,-56(%rdi) + movq %r8,%r10 + shrq $34,%r10 + andq %rax,%r10 + movq %r10,-48(%rdi) + shrdq $63,%r9,%r8 + andq %rax,%r8 + movq %r8,-40(%rdi) + movq 48(%rsi),%r10 + movq %r9,%r8 + shrq $28,%r8 + andq %rax,%r8 + movq %r8,-32(%rdi) + shrdq $57,%r10,%r9 + andq %rax,%r9 + movq %r9,-24(%rdi) + movq 56(%rsi),%r11 + movq %r10,%r9 + shrq $22,%r9 + andq %rax,%r9 + movq %r9,-16(%rdi) + shrdq $51,%r11,%r10 + andq %rax,%r10 + movq %r10,-8(%rdi) + movq 64(%rsi),%r8 + movq %r11,%r10 + shrq $16,%r10 + andq %rax,%r10 + movq %r10,0(%rdi) + shrdq $45,%r8,%r11 + andq %rax,%r11 + movq %r11,8(%rdi) + movq 72(%rsi),%r9 + movq %r8,%r11 + shrq $10,%r11 + andq %rax,%r11 + movq %r11,16(%rdi) + shrdq $39,%r9,%r8 + andq %rax,%r8 + movq %r8,24(%rdi) + movq 80(%rsi),%r10 + movq %r9,%r8 + shrq $4,%r8 + andq %rax,%r8 + movq %r8,32(%rdi) + movq %r9,%r11 + shrq $33,%r11 + andq %rax,%r11 + movq %r11,40(%rdi) + shrdq $62,%r10,%r9 + andq %rax,%r9 + movq %r9,48(%rdi) + movq 88(%rsi),%r11 + movq %r10,%r9 + shrq $27,%r9 + andq %rax,%r9 + movq %r9,56(%rdi) + shrdq $56,%r11,%r10 + andq %rax,%r10 + movq %r10,64(%rdi) + movq 96(%rsi),%r8 + movq %r11,%r10 + shrq $21,%r10 + andq %rax,%r10 + movq %r10,72(%rdi) + shrdq $50,%r8,%r11 + andq %rax,%r11 + movq %r11,80(%rdi) + movq 104(%rsi),%r9 + movq %r8,%r11 + shrq $15,%r11 + andq %rax,%r11 + movq %r11,88(%rdi) + shrdq $44,%r9,%r8 + andq %rax,%r8 + movq %r8,96(%rdi) + movq 112(%rsi),%r10 + movq %r9,%r8 + shrq $9,%r8 + andq %rax,%r8 + movq %r8,104(%rdi) + shrdq $38,%r10,%r9 + andq %rax,%r9 + movq %r9,112(%rdi) + movq 120(%rsi),%r11 + movq %r10,%r9 + shrq $3,%r9 + andq %rax,%r9 + movq %r9,120(%rdi) + movq %r10,%r8 + shrq $32,%r8 + andq %rax,%r8 + movq %r8,128(%rdi) + shrdq $61,%r11,%r10 + andq %rax,%r10 + movq %r10,136(%rdi) + xorq %r8,%r8 + movq %r11,%r10 + shrq $26,%r10 + andq %rax,%r10 + movq %r10,144(%rdi) + shrdq $55,%r8,%r11 + andq %rax,%r11 + movq %r11,152(%rdi) + movq %r8,160(%rdi) + movq %r8,168(%rdi) + movq %r8,176(%rdi) + movq %r8,184(%rdi) + ret + + +.globl _rsaz_1024_scatter5_avx2 +.private_extern _rsaz_1024_scatter5_avx2 + +.p2align 5 +_rsaz_1024_scatter5_avx2: + +_CET_ENDBR + vzeroupper + vmovdqu L$scatter_permd(%rip),%ymm5 + shll $4,%edx + leaq (%rdi,%rdx,1),%rdi + movl $9,%eax + jmp L$oop_scatter_1024 + +.p2align 5 +L$oop_scatter_1024: + vmovdqu (%rsi),%ymm0 + leaq 32(%rsi),%rsi + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,(%rdi) + leaq 512(%rdi),%rdi + decl %eax + jnz L$oop_scatter_1024 + + vzeroupper + ret + + + +.globl _rsaz_1024_gather5_avx2 +.private_extern _rsaz_1024_gather5_avx2 + +.p2align 5 +_rsaz_1024_gather5_avx2: + +_CET_ENDBR + vzeroupper + movq %rsp,%r11 + + leaq -256(%rsp),%rsp + andq $-32,%rsp + leaq L$inc(%rip),%r10 + leaq -128(%rsp),%rax + + vmovd %edx,%xmm4 + vmovdqa (%r10),%ymm0 + vmovdqa 32(%r10),%ymm1 + vmovdqa 64(%r10),%ymm5 + vpbroadcastd %xmm4,%ymm4 + + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,0+128(%rax) + vpaddd %ymm5,%ymm2,%ymm0 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,32+128(%rax) + vpaddd %ymm5,%ymm3,%ymm1 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,64+128(%rax) + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vmovdqa %ymm3,96+128(%rax) + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,128+128(%rax) + vpaddd %ymm5,%ymm2,%ymm8 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,160+128(%rax) + vpaddd %ymm5,%ymm3,%ymm9 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,192+128(%rax) + vpaddd %ymm5,%ymm8,%ymm10 + vpcmpeqd %ymm4,%ymm8,%ymm8 + vmovdqa %ymm3,224+128(%rax) + vpaddd %ymm5,%ymm9,%ymm11 + vpcmpeqd %ymm4,%ymm9,%ymm9 + vpaddd %ymm5,%ymm10,%ymm12 + vpcmpeqd %ymm4,%ymm10,%ymm10 + vpaddd %ymm5,%ymm11,%ymm13 + vpcmpeqd %ymm4,%ymm11,%ymm11 + vpaddd %ymm5,%ymm12,%ymm14 + vpcmpeqd %ymm4,%ymm12,%ymm12 + vpaddd %ymm5,%ymm13,%ymm15 + vpcmpeqd %ymm4,%ymm13,%ymm13 + vpcmpeqd %ymm4,%ymm14,%ymm14 + vpcmpeqd %ymm4,%ymm15,%ymm15 + + vmovdqa -32(%r10),%ymm7 + leaq 128(%rsi),%rsi + movl $9,%edx + +L$oop_gather_1024: + vmovdqa 0-128(%rsi),%ymm0 + vmovdqa 32-128(%rsi),%ymm1 + vmovdqa 64-128(%rsi),%ymm2 + vmovdqa 96-128(%rsi),%ymm3 + vpand 0+128(%rax),%ymm0,%ymm0 + vpand 32+128(%rax),%ymm1,%ymm1 + vpand 64+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm1,%ymm4 + vpand 96+128(%rax),%ymm3,%ymm3 + vmovdqa 128-128(%rsi),%ymm0 + vmovdqa 160-128(%rsi),%ymm1 + vpor %ymm2,%ymm3,%ymm5 + vmovdqa 192-128(%rsi),%ymm2 + vmovdqa 224-128(%rsi),%ymm3 + vpand 128+128(%rax),%ymm0,%ymm0 + vpand 160+128(%rax),%ymm1,%ymm1 + vpand 192+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm4,%ymm4 + vpand 224+128(%rax),%ymm3,%ymm3 + vpand 256-128(%rsi),%ymm8,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 288-128(%rsi),%ymm9,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 320-128(%rsi),%ymm10,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 352-128(%rsi),%ymm11,%ymm3 + vpor %ymm0,%ymm4,%ymm4 + vpand 384-128(%rsi),%ymm12,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 416-128(%rsi),%ymm13,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 448-128(%rsi),%ymm14,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 480-128(%rsi),%ymm15,%ymm3 + leaq 512(%rsi),%rsi + vpor %ymm0,%ymm4,%ymm4 + vpor %ymm1,%ymm5,%ymm5 + vpor %ymm2,%ymm4,%ymm4 + vpor %ymm3,%ymm5,%ymm5 + + vpor %ymm5,%ymm4,%ymm4 + vextracti128 $1,%ymm4,%xmm5 + vpor %xmm4,%xmm5,%xmm5 + vpermd %ymm5,%ymm7,%ymm5 + vmovdqu %ymm5,(%rdi) + leaq 32(%rdi),%rdi + decl %edx + jnz L$oop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + vzeroupper + leaq (%r11),%rsp + + ret + +L$SEH_end_rsaz_1024_gather5: + +.section __DATA,__const +.p2align 6 +L$and_mask: +.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff +L$scatter_permd: +.long 0,2,4,6,7,7,7,7 +L$gather_permd: +.long 0,7,1,7,2,7,3,7 +L$inc: +.long 0,0,0,0, 1,1,1,1 +.long 2,2,2,2, 3,3,3,3 +.long 4,4,4,4, 4,4,4,4 +.p2align 6 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-linux.S b/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-linux.S new file mode 100644 index 0000000000..65a6c2e8cd --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-linux.S @@ -0,0 +1,1749 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.globl rsaz_1024_sqr_avx2 +.hidden rsaz_1024_sqr_avx2 +.type rsaz_1024_sqr_avx2,@function +.align 64 +rsaz_1024_sqr_avx2: +.cfi_startproc +_CET_ENDBR + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + vzeroupper + movq %rax,%rbp +.cfi_def_cfa_register %rbp + movq %rdx,%r13 + subq $832,%rsp + movq %r13,%r15 + subq $-128,%rdi + subq $-128,%rsi + subq $-128,%r13 + + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + vpxor %ymm9,%ymm9,%ymm9 + jz .Lsqr_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%r13),%ymm0 + andq $-2048,%rsp + vmovdqu 32-128(%r13),%ymm1 + vmovdqu 64-128(%r13),%ymm2 + vmovdqu 96-128(%r13),%ymm3 + vmovdqu 128-128(%r13),%ymm4 + vmovdqu 160-128(%r13),%ymm5 + vmovdqu 192-128(%r13),%ymm6 + vmovdqu 224-128(%r13),%ymm7 + vmovdqu 256-128(%r13),%ymm8 + leaq 832+128(%rsp),%r13 + vmovdqu %ymm0,0-128(%r13) + vmovdqu %ymm1,32-128(%r13) + vmovdqu %ymm2,64-128(%r13) + vmovdqu %ymm3,96-128(%r13) + vmovdqu %ymm4,128-128(%r13) + vmovdqu %ymm5,160-128(%r13) + vmovdqu %ymm6,192-128(%r13) + vmovdqu %ymm7,224-128(%r13) + vmovdqu %ymm8,256-128(%r13) + vmovdqu %ymm9,288-128(%r13) + +.Lsqr_1024_no_n_copy: + andq $-1024,%rsp + + vmovdqu 32-128(%rsi),%ymm1 + vmovdqu 64-128(%rsi),%ymm2 + vmovdqu 96-128(%rsi),%ymm3 + vmovdqu 128-128(%rsi),%ymm4 + vmovdqu 160-128(%rsi),%ymm5 + vmovdqu 192-128(%rsi),%ymm6 + vmovdqu 224-128(%rsi),%ymm7 + vmovdqu 256-128(%rsi),%ymm8 + + leaq 192(%rsp),%rbx + vmovdqu .Land_mask(%rip),%ymm15 + jmp .LOOP_GRANDE_SQR_1024 + +.align 32 +.LOOP_GRANDE_SQR_1024: + leaq 576+128(%rsp),%r9 + leaq 448(%rsp),%r12 + + + + + vpaddq %ymm1,%ymm1,%ymm1 + vpbroadcastq 0-128(%rsi),%ymm10 + vpaddq %ymm2,%ymm2,%ymm2 + vmovdqa %ymm1,0-128(%r9) + vpaddq %ymm3,%ymm3,%ymm3 + vmovdqa %ymm2,32-128(%r9) + vpaddq %ymm4,%ymm4,%ymm4 + vmovdqa %ymm3,64-128(%r9) + vpaddq %ymm5,%ymm5,%ymm5 + vmovdqa %ymm4,96-128(%r9) + vpaddq %ymm6,%ymm6,%ymm6 + vmovdqa %ymm5,128-128(%r9) + vpaddq %ymm7,%ymm7,%ymm7 + vmovdqa %ymm6,160-128(%r9) + vpaddq %ymm8,%ymm8,%ymm8 + vmovdqa %ymm7,192-128(%r9) + vpxor %ymm9,%ymm9,%ymm9 + vmovdqa %ymm8,224-128(%r9) + + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpbroadcastq 32-128(%rsi),%ymm11 + vmovdqu %ymm9,288-192(%rbx) + vpmuludq %ymm10,%ymm1,%ymm1 + vmovdqu %ymm9,320-448(%r12) + vpmuludq %ymm10,%ymm2,%ymm2 + vmovdqu %ymm9,352-448(%r12) + vpmuludq %ymm10,%ymm3,%ymm3 + vmovdqu %ymm9,384-448(%r12) + vpmuludq %ymm10,%ymm4,%ymm4 + vmovdqu %ymm9,416-448(%r12) + vpmuludq %ymm10,%ymm5,%ymm5 + vmovdqu %ymm9,448-448(%r12) + vpmuludq %ymm10,%ymm6,%ymm6 + vmovdqu %ymm9,480-448(%r12) + vpmuludq %ymm10,%ymm7,%ymm7 + vmovdqu %ymm9,512-448(%r12) + vpmuludq %ymm10,%ymm8,%ymm8 + vpbroadcastq 64-128(%rsi),%ymm10 + vmovdqu %ymm9,544-448(%r12) + + movq %rsi,%r15 + movl $4,%r14d + jmp .Lsqr_entry_1024 +.align 32 +.LOOP_SQR_1024: + vpbroadcastq 32-128(%r15),%ymm11 + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpaddq 0-192(%rbx),%ymm0,%ymm0 + vpmuludq 0-128(%r9),%ymm10,%ymm1 + vpaddq 32-192(%rbx),%ymm1,%ymm1 + vpmuludq 32-128(%r9),%ymm10,%ymm2 + vpaddq 64-192(%rbx),%ymm2,%ymm2 + vpmuludq 64-128(%r9),%ymm10,%ymm3 + vpaddq 96-192(%rbx),%ymm3,%ymm3 + vpmuludq 96-128(%r9),%ymm10,%ymm4 + vpaddq 128-192(%rbx),%ymm4,%ymm4 + vpmuludq 128-128(%r9),%ymm10,%ymm5 + vpaddq 160-192(%rbx),%ymm5,%ymm5 + vpmuludq 160-128(%r9),%ymm10,%ymm6 + vpaddq 192-192(%rbx),%ymm6,%ymm6 + vpmuludq 192-128(%r9),%ymm10,%ymm7 + vpaddq 224-192(%rbx),%ymm7,%ymm7 + vpmuludq 224-128(%r9),%ymm10,%ymm8 + vpbroadcastq 64-128(%r15),%ymm10 + vpaddq 256-192(%rbx),%ymm8,%ymm8 +.Lsqr_entry_1024: + vmovdqu %ymm0,0-192(%rbx) + vmovdqu %ymm1,32-192(%rbx) + + vpmuludq 32-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 32-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 64-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 96-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 128-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 160-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 192-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 224-128(%r9),%ymm11,%ymm0 + vpbroadcastq 96-128(%r15),%ymm11 + vpaddq 288-192(%rbx),%ymm0,%ymm0 + + vmovdqu %ymm2,64-192(%rbx) + vmovdqu %ymm3,96-192(%rbx) + + vpmuludq 64-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 64-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 96-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 128-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 160-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 224-128(%r9),%ymm10,%ymm1 + vpbroadcastq 128-128(%r15),%ymm10 + vpaddq 320-448(%r12),%ymm1,%ymm1 + + vmovdqu %ymm4,128-192(%rbx) + vmovdqu %ymm5,160-192(%rbx) + + vpmuludq 96-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 96-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq 128-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm0,%ymm0 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq 224-128(%r9),%ymm11,%ymm2 + vpbroadcastq 160-128(%r15),%ymm11 + vpaddq 352-448(%r12),%ymm2,%ymm2 + + vmovdqu %ymm6,192-192(%rbx) + vmovdqu %ymm7,224-192(%rbx) + + vpmuludq 128-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 128-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 160-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 192-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 224-128(%r9),%ymm10,%ymm3 + vpbroadcastq 192-128(%r15),%ymm10 + vpaddq 384-448(%r12),%ymm3,%ymm3 + + vmovdqu %ymm8,256-192(%rbx) + vmovdqu %ymm0,288-192(%rbx) + leaq 8(%rbx),%rbx + + vpmuludq 160-128(%rsi),%ymm11,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 224-128(%r9),%ymm11,%ymm4 + vpbroadcastq 224-128(%r15),%ymm11 + vpaddq 416-448(%r12),%ymm4,%ymm4 + + vmovdqu %ymm1,320-448(%r12) + vmovdqu %ymm2,352-448(%r12) + + vpmuludq 192-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpbroadcastq 256-128(%r15),%ymm0 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq 224-128(%r9),%ymm10,%ymm5 + vpbroadcastq 0+8-128(%r15),%ymm10 + vpaddq 448-448(%r12),%ymm5,%ymm5 + + vmovdqu %ymm3,384-448(%r12) + vmovdqu %ymm4,416-448(%r12) + leaq 8(%r15),%r15 + + vpmuludq 224-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 224-128(%r9),%ymm11,%ymm6 + vpaddq 480-448(%r12),%ymm6,%ymm6 + + vpmuludq 256-128(%rsi),%ymm0,%ymm7 + vmovdqu %ymm5,448-448(%r12) + vpaddq 512-448(%r12),%ymm7,%ymm7 + vmovdqu %ymm6,480-448(%r12) + vmovdqu %ymm7,512-448(%r12) + leaq 8(%r12),%r12 + + decl %r14d + jnz .LOOP_SQR_1024 + + vmovdqu 256(%rsp),%ymm8 + vmovdqu 288(%rsp),%ymm1 + vmovdqu 320(%rsp),%ymm2 + leaq 192(%rsp),%rbx + + vpsrlq $29,%ymm8,%ymm14 + vpand %ymm15,%ymm8,%ymm8 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + + vpermq $0x93,%ymm14,%ymm14 + vpxor %ymm9,%ymm9,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm8,%ymm8 + vpblendd $3,%ymm11,%ymm9,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,288-192(%rbx) + vmovdqu %ymm2,320-192(%rbx) + + movq (%rsp),%rax + movq 8(%rsp),%r10 + movq 16(%rsp),%r11 + movq 24(%rsp),%r12 + vmovdqu 32(%rsp),%ymm1 + vmovdqu 64-192(%rbx),%ymm2 + vmovdqu 96-192(%rbx),%ymm3 + vmovdqu 128-192(%rbx),%ymm4 + vmovdqu 160-192(%rbx),%ymm5 + vmovdqu 192-192(%rbx),%ymm6 + vmovdqu 224-192(%rbx),%ymm7 + + movq %rax,%r9 + imull %ecx,%eax + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + + movq %rax,%rdx + imulq -128(%r13),%rax + vpbroadcastq %xmm12,%ymm12 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax + shrq $29,%r9 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + addq %r9,%r10 + addq %rax,%r11 + imulq 24-128(%r13),%rdx + addq %rdx,%r12 + + movq %r10,%rax + imull %ecx,%eax + andl $0x1fffffff,%eax + + movl $9,%r14d + jmp .LOOP_REDUCE_1024 + +.align 32 +.LOOP_REDUCE_1024: + vmovd %eax,%xmm13 + vpbroadcastq %xmm13,%ymm13 + + vpmuludq 32-128(%r13),%ymm12,%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm10,%ymm1,%ymm1 + addq %rax,%r10 + vpmuludq 64-128(%r13),%ymm12,%ymm14 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm14,%ymm2,%ymm2 + vpmuludq 96-128(%r13),%ymm12,%ymm11 +.byte 0x67 + addq %rax,%r11 +.byte 0x67 + movq %rdx,%rax + imulq 16-128(%r13),%rax + shrq $29,%r10 + vpaddq %ymm11,%ymm3,%ymm3 + vpmuludq 128-128(%r13),%ymm12,%ymm10 + addq %rax,%r12 + addq %r10,%r11 + vpaddq %ymm10,%ymm4,%ymm4 + vpmuludq 160-128(%r13),%ymm12,%ymm14 + movq %r11,%rax + imull %ecx,%eax + vpaddq %ymm14,%ymm5,%ymm5 + vpmuludq 192-128(%r13),%ymm12,%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm11,%ymm6,%ymm6 + vpmuludq 224-128(%r13),%ymm12,%ymm10 + vpaddq %ymm10,%ymm7,%ymm7 + vpmuludq 256-128(%r13),%ymm12,%ymm14 + vmovd %eax,%xmm12 + + vpaddq %ymm14,%ymm8,%ymm8 + + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 32-8-128(%r13),%ymm13,%ymm11 + vmovdqu 96-8-128(%r13),%ymm14 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm1,%ymm1 + vpmuludq 64-8-128(%r13),%ymm13,%ymm10 + vmovdqu 128-8-128(%r13),%ymm11 + addq %rax,%r11 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm10,%ymm2,%ymm2 + addq %r12,%rax + shrq $29,%r11 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 160-8-128(%r13),%ymm10 + addq %r11,%rax + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 192-8-128(%r13),%ymm14 +.byte 0x67 + movq %rax,%r12 + imull %ecx,%eax + vpaddq %ymm11,%ymm4,%ymm4 + vpmuludq %ymm13,%ymm10,%ymm10 +.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm5,%ymm5 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 256-8-128(%r13),%ymm10 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 288-8-128(%r13),%ymm9 + vmovd %eax,%xmm0 + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm7,%ymm7 + vpmuludq %ymm13,%ymm10,%ymm10 + vmovdqu 32-16-128(%r13),%ymm14 + vpbroadcastq %xmm0,%ymm0 + vpaddq %ymm10,%ymm8,%ymm8 + vpmuludq %ymm13,%ymm9,%ymm9 + vmovdqu 64-16-128(%r13),%ymm11 + addq %rax,%r12 + + vmovdqu 32-24-128(%r13),%ymm13 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 96-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq %ymm0,%ymm13,%ymm13 + vpmuludq %ymm12,%ymm11,%ymm11 +.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq %ymm1,%ymm13,%ymm13 + vpaddq %ymm11,%ymm2,%ymm2 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 160-16-128(%r13),%ymm11 +.byte 0x67 + vmovq %xmm13,%rax + vmovdqu %ymm13,(%rsp) + vpaddq %ymm10,%ymm3,%ymm3 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 192-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq %ymm12,%ymm11,%ymm11 + vmovdqu 224-16-128(%r13),%ymm14 + vpaddq %ymm11,%ymm5,%ymm5 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 256-16-128(%r13),%ymm11 + vpaddq %ymm10,%ymm6,%ymm6 + vpmuludq %ymm12,%ymm14,%ymm14 + shrq $29,%r12 + vmovdqu 288-16-128(%r13),%ymm10 + addq %r12,%rax + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq %ymm12,%ymm11,%ymm11 + + movq %rax,%r9 + imull %ecx,%eax + vpaddq %ymm11,%ymm8,%ymm8 + vpmuludq %ymm12,%ymm10,%ymm10 + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + vmovdqu 96-24-128(%r13),%ymm11 +.byte 0x67 + vpaddq %ymm10,%ymm9,%ymm9 + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 64-24-128(%r13),%ymm0,%ymm14 + vmovdqu 128-24-128(%r13),%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + movq 8(%rsp),%r10 + vpaddq %ymm14,%ymm2,%ymm1 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 160-24-128(%r13),%ymm14 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax +.byte 0x67 + shrq $29,%r9 + movq 16(%rsp),%r11 + vpaddq %ymm11,%ymm3,%ymm2 + vpmuludq %ymm0,%ymm10,%ymm10 + vmovdqu 192-24-128(%r13),%ymm11 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + vpaddq %ymm10,%ymm4,%ymm3 + vpmuludq %ymm0,%ymm14,%ymm14 + vmovdqu 224-24-128(%r13),%ymm10 + imulq 24-128(%r13),%rdx + addq %rax,%r11 + leaq (%r9,%r10,1),%rax + vpaddq %ymm14,%ymm5,%ymm4 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 256-24-128(%r13),%ymm14 + movq %rax,%r10 + imull %ecx,%eax + vpmuludq %ymm0,%ymm10,%ymm10 + vpaddq %ymm11,%ymm6,%ymm5 + vmovdqu 288-24-128(%r13),%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm7,%ymm6 + vpmuludq %ymm0,%ymm14,%ymm14 + addq 24(%rsp),%rdx + vpaddq %ymm14,%ymm8,%ymm7 + vpmuludq %ymm0,%ymm11,%ymm11 + vpaddq %ymm11,%ymm9,%ymm8 + vmovq %r12,%xmm9 + movq %rdx,%r12 + + decl %r14d + jnz .LOOP_REDUCE_1024 + leaq 448(%rsp),%r12 + vpaddq %ymm9,%ymm13,%ymm0 + vpxor %ymm9,%ymm9,%ymm9 + + vpaddq 288-192(%rbx),%ymm0,%ymm0 + vpaddq 320-448(%r12),%ymm1,%ymm1 + vpaddq 352-448(%r12),%ymm2,%ymm2 + vpaddq 384-448(%r12),%ymm3,%ymm3 + vpaddq 416-448(%r12),%ymm4,%ymm4 + vpaddq 448-448(%r12),%ymm5,%ymm5 + vpaddq 480-448(%r12),%ymm6,%ymm6 + vpaddq 512-448(%r12),%ymm7,%ymm7 + vpaddq 544-448(%r12),%ymm8,%ymm8 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm13,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vmovdqu %ymm0,0-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,32-128(%rdi) + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vmovdqu %ymm2,64-128(%rdi) + vpaddq %ymm13,%ymm4,%ymm4 + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vmovdqu %ymm4,128-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vmovdqu %ymm5,160-128(%rdi) + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vmovdqu %ymm6,192-128(%rdi) + vpaddq %ymm13,%ymm8,%ymm8 + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + + movq %rdi,%rsi + decl %r8d + jne .LOOP_GRANDE_SQR_1024 + + vzeroall + movq %rbp,%rax +.cfi_def_cfa_register %rax + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lsqr_1024_epilogue: + ret +.cfi_endproc +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.hidden rsaz_1024_mul_avx2 +.type rsaz_1024_mul_avx2,@function +.align 64 +rsaz_1024_mul_avx2: +.cfi_startproc +_CET_ENDBR + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + movq %rax,%rbp +.cfi_def_cfa_register %rbp + vzeroall + movq %rdx,%r13 + subq $64,%rsp + + + + + + +.byte 0x67,0x67 + movq %rsi,%r15 + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + movq %rsi,%r15 + cmovnzq %r13,%rsi + cmovnzq %r15,%r13 + + movq %rcx,%r15 + subq $-128,%rsi + subq $-128,%rcx + subq $-128,%rdi + + andq $4095,%r15 + addq $320,%r15 +.byte 0x67,0x67 + shrq $12,%r15 + jz .Lmul_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%rcx),%ymm0 + andq $-512,%rsp + vmovdqu 32-128(%rcx),%ymm1 + vmovdqu 64-128(%rcx),%ymm2 + vmovdqu 96-128(%rcx),%ymm3 + vmovdqu 128-128(%rcx),%ymm4 + vmovdqu 160-128(%rcx),%ymm5 + vmovdqu 192-128(%rcx),%ymm6 + vmovdqu 224-128(%rcx),%ymm7 + vmovdqu 256-128(%rcx),%ymm8 + leaq 64+128(%rsp),%rcx + vmovdqu %ymm0,0-128(%rcx) + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm1,32-128(%rcx) + vpxor %ymm1,%ymm1,%ymm1 + vmovdqu %ymm2,64-128(%rcx) + vpxor %ymm2,%ymm2,%ymm2 + vmovdqu %ymm3,96-128(%rcx) + vpxor %ymm3,%ymm3,%ymm3 + vmovdqu %ymm4,128-128(%rcx) + vpxor %ymm4,%ymm4,%ymm4 + vmovdqu %ymm5,160-128(%rcx) + vpxor %ymm5,%ymm5,%ymm5 + vmovdqu %ymm6,192-128(%rcx) + vpxor %ymm6,%ymm6,%ymm6 + vmovdqu %ymm7,224-128(%rcx) + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu %ymm8,256-128(%rcx) + vmovdqa %ymm0,%ymm8 + vmovdqu %ymm9,288-128(%rcx) +.Lmul_1024_no_n_copy: + andq $-64,%rsp + + movq (%r13),%rbx + vpbroadcastq (%r13),%ymm10 + vmovdqu %ymm0,(%rsp) + xorq %r9,%r9 +.byte 0x67 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + + vmovdqu .Land_mask(%rip),%ymm15 + movl $9,%r14d + vmovdqu %ymm9,288-128(%rdi) + jmp .Loop_mul_1024 + +.align 32 +.Loop_mul_1024: + vpsrlq $29,%ymm3,%ymm9 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r9,%rax + movq %rbx,%r10 + imulq 8-128(%rsi),%r10 + addq 8(%rsp),%r10 + + movq %rax,%r9 + imull %r8d,%eax + andl $0x1fffffff,%eax + + movq %rbx,%r11 + imulq 16-128(%rsi),%r11 + addq 16(%rsp),%r11 + + movq %rbx,%r12 + imulq 24-128(%rsi),%r12 + addq 24(%rsp),%r12 + vpmuludq 32-128(%rsi),%ymm10,%ymm0 + vmovd %eax,%xmm11 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq 64-128(%rsi),%ymm10,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 96-128(%rsi),%ymm10,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq 128-128(%rsi),%ymm10,%ymm0 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq 160-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 192-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq 224-128(%rsi),%ymm10,%ymm0 + vpermq $0x93,%ymm9,%ymm9 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq 256-128(%rsi),%ymm10,%ymm12 + vpbroadcastq 8(%r13),%ymm10 + vpaddq %ymm12,%ymm8,%ymm8 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%rcx),%rax + addq %rax,%r11 + shrq $29,%r9 + imulq 24-128(%rcx),%rdx + addq %rdx,%r12 + addq %r9,%r10 + + vpmuludq 32-128(%rcx),%ymm11,%ymm13 + vmovq %xmm10,%rbx + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 64-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm2,%ymm2 + vpmuludq 96-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 128-128(%rcx),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 160-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm5,%ymm5 + vpmuludq 192-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 224-128(%rcx),%ymm11,%ymm13 + vpblendd $3,%ymm14,%ymm9,%ymm12 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 256-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm0,%ymm8,%ymm8 + + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rsi),%ymm12 + movq %rbx,%rax + imulq 8-128(%rsi),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rsi),%ymm13 + + movq %r10,%rax + vpblendd $0xfc,%ymm14,%ymm9,%ymm9 + imull %r8d,%eax + vpaddq %ymm9,%ymm4,%ymm4 + andl $0x1fffffff,%eax + + imulq 16-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovd %eax,%xmm11 + vmovdqu -8+96-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -8+128-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+160-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+192-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -8+224-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+256-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+288-128(%rsi),%ymm9 + vpaddq %ymm12,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm13,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm9,%ymm9 + vpbroadcastq 16(%r13),%ymm10 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rcx),%ymm0 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rcx),%ymm12 + shrq $29,%r10 + imulq 16-128(%rcx),%rdx + addq %rdx,%r12 + addq %r10,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -8+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rsi),%ymm0 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r11,%rax + + vmovdqu -16+64-128(%rsi),%ymm12 + movq %rax,%r11 + imull %r8d,%eax + andl $0x1fffffff,%eax + + imulq 8-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -16+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -16+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -16+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 24(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rcx),%ymm0 + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r11 + vmovdqu -16+64-128(%rcx),%ymm12 + imulq 8-128(%rcx),%rdx + addq %rdx,%r12 + shrq $29,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -16+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+32-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+64-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm9,%ymm9 + + addq %r11,%r12 + imulq -128(%rsi),%rbx + addq %rbx,%r12 + + movq %r12,%rax + imull %r8d,%eax + andl $0x1fffffff,%eax + + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -24+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -24+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -24+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 32(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + addq $32,%r13 + + vmovdqu -24+32-128(%rcx),%ymm0 + imulq -128(%rcx),%rax + addq %rax,%r12 + shrq $29,%r12 + + vmovdqu -24+64-128(%rcx),%ymm12 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -24+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm0 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu %ymm0,(%rsp) + vpaddq %ymm12,%ymm2,%ymm1 + vmovdqu -24+128-128(%rcx),%ymm0 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm2 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm3 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm4 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm5 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+288-128(%rcx),%ymm13 + movq %r12,%r9 + vpaddq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm11,%ymm12,%ymm12 + addq (%rsp),%r9 + vpaddq %ymm12,%ymm8,%ymm7 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovq %r12,%xmm12 + vpaddq %ymm13,%ymm9,%ymm8 + + decl %r14d + jnz .Loop_mul_1024 + vpaddq (%rsp),%ymm12,%ymm0 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm10,%ymm10 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpermq $0x93,%ymm11,%ymm11 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm10,%ymm10 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vmovdqu %ymm0,0-128(%rdi) + vmovdqu %ymm1,32-128(%rdi) + vmovdqu %ymm2,64-128(%rdi) + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vmovdqu %ymm4,128-128(%rdi) + vmovdqu %ymm5,160-128(%rdi) + vmovdqu %ymm6,192-128(%rdi) + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + vzeroupper + + movq %rbp,%rax +.cfi_def_cfa_register %rax + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lmul_1024_epilogue: + ret +.cfi_endproc +.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 +.globl rsaz_1024_red2norm_avx2 +.hidden rsaz_1024_red2norm_avx2 +.type rsaz_1024_red2norm_avx2,@function +.align 32 +rsaz_1024_red2norm_avx2: +.cfi_startproc +_CET_ENDBR + subq $-128,%rsi + xorq %rax,%rax + movq -128(%rsi),%r8 + movq -120(%rsi),%r9 + movq -112(%rsi),%r10 + shlq $0,%r8 + shlq $29,%r9 + movq %r10,%r11 + shlq $58,%r10 + shrq $6,%r11 + addq %r8,%rax + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,0(%rdi) + movq %r11,%rax + movq -104(%rsi),%r8 + movq -96(%rsi),%r9 + shlq $23,%r8 + movq %r9,%r10 + shlq $52,%r9 + shrq $12,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,8(%rdi) + movq %r10,%rax + movq -88(%rsi),%r11 + movq -80(%rsi),%r8 + shlq $17,%r11 + movq %r8,%r9 + shlq $46,%r8 + shrq $18,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,16(%rdi) + movq %r9,%rax + movq -72(%rsi),%r10 + movq -64(%rsi),%r11 + shlq $11,%r10 + movq %r11,%r8 + shlq $40,%r11 + shrq $24,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,24(%rdi) + movq %r8,%rax + movq -56(%rsi),%r9 + movq -48(%rsi),%r10 + movq -40(%rsi),%r11 + shlq $5,%r9 + shlq $34,%r10 + movq %r11,%r8 + shlq $63,%r11 + shrq $1,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,32(%rdi) + movq %r8,%rax + movq -32(%rsi),%r9 + movq -24(%rsi),%r10 + shlq $28,%r9 + movq %r10,%r11 + shlq $57,%r10 + shrq $7,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,40(%rdi) + movq %r11,%rax + movq -16(%rsi),%r8 + movq -8(%rsi),%r9 + shlq $22,%r8 + movq %r9,%r10 + shlq $51,%r9 + shrq $13,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,48(%rdi) + movq %r10,%rax + movq 0(%rsi),%r11 + movq 8(%rsi),%r8 + shlq $16,%r11 + movq %r8,%r9 + shlq $45,%r8 + shrq $19,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,56(%rdi) + movq %r9,%rax + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + shlq $10,%r10 + movq %r11,%r8 + shlq $39,%r11 + shrq $25,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,64(%rdi) + movq %r8,%rax + movq 32(%rsi),%r9 + movq 40(%rsi),%r10 + movq 48(%rsi),%r11 + shlq $4,%r9 + shlq $33,%r10 + movq %r11,%r8 + shlq $62,%r11 + shrq $2,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,72(%rdi) + movq %r8,%rax + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + shlq $27,%r9 + movq %r10,%r11 + shlq $56,%r10 + shrq $8,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,80(%rdi) + movq %r11,%rax + movq 72(%rsi),%r8 + movq 80(%rsi),%r9 + shlq $21,%r8 + movq %r9,%r10 + shlq $50,%r9 + shrq $14,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,88(%rdi) + movq %r10,%rax + movq 88(%rsi),%r11 + movq 96(%rsi),%r8 + shlq $15,%r11 + movq %r8,%r9 + shlq $44,%r8 + shrq $20,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,96(%rdi) + movq %r9,%rax + movq 104(%rsi),%r10 + movq 112(%rsi),%r11 + shlq $9,%r10 + movq %r11,%r8 + shlq $38,%r11 + shrq $26,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,104(%rdi) + movq %r8,%rax + movq 120(%rsi),%r9 + movq 128(%rsi),%r10 + movq 136(%rsi),%r11 + shlq $3,%r9 + shlq $32,%r10 + movq %r11,%r8 + shlq $61,%r11 + shrq $3,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,112(%rdi) + movq %r8,%rax + movq 144(%rsi),%r9 + movq 152(%rsi),%r10 + shlq $26,%r9 + movq %r10,%r11 + shlq $55,%r10 + shrq $9,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,120(%rdi) + movq %r11,%rax + ret +.cfi_endproc +.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 + +.globl rsaz_1024_norm2red_avx2 +.hidden rsaz_1024_norm2red_avx2 +.type rsaz_1024_norm2red_avx2,@function +.align 32 +rsaz_1024_norm2red_avx2: +.cfi_startproc +_CET_ENDBR + subq $-128,%rdi + movq (%rsi),%r8 + movl $0x1fffffff,%eax + movq 8(%rsi),%r9 + movq %r8,%r11 + shrq $0,%r11 + andq %rax,%r11 + movq %r11,-128(%rdi) + movq %r8,%r10 + shrq $29,%r10 + andq %rax,%r10 + movq %r10,-120(%rdi) + shrdq $58,%r9,%r8 + andq %rax,%r8 + movq %r8,-112(%rdi) + movq 16(%rsi),%r10 + movq %r9,%r8 + shrq $23,%r8 + andq %rax,%r8 + movq %r8,-104(%rdi) + shrdq $52,%r10,%r9 + andq %rax,%r9 + movq %r9,-96(%rdi) + movq 24(%rsi),%r11 + movq %r10,%r9 + shrq $17,%r9 + andq %rax,%r9 + movq %r9,-88(%rdi) + shrdq $46,%r11,%r10 + andq %rax,%r10 + movq %r10,-80(%rdi) + movq 32(%rsi),%r8 + movq %r11,%r10 + shrq $11,%r10 + andq %rax,%r10 + movq %r10,-72(%rdi) + shrdq $40,%r8,%r11 + andq %rax,%r11 + movq %r11,-64(%rdi) + movq 40(%rsi),%r9 + movq %r8,%r11 + shrq $5,%r11 + andq %rax,%r11 + movq %r11,-56(%rdi) + movq %r8,%r10 + shrq $34,%r10 + andq %rax,%r10 + movq %r10,-48(%rdi) + shrdq $63,%r9,%r8 + andq %rax,%r8 + movq %r8,-40(%rdi) + movq 48(%rsi),%r10 + movq %r9,%r8 + shrq $28,%r8 + andq %rax,%r8 + movq %r8,-32(%rdi) + shrdq $57,%r10,%r9 + andq %rax,%r9 + movq %r9,-24(%rdi) + movq 56(%rsi),%r11 + movq %r10,%r9 + shrq $22,%r9 + andq %rax,%r9 + movq %r9,-16(%rdi) + shrdq $51,%r11,%r10 + andq %rax,%r10 + movq %r10,-8(%rdi) + movq 64(%rsi),%r8 + movq %r11,%r10 + shrq $16,%r10 + andq %rax,%r10 + movq %r10,0(%rdi) + shrdq $45,%r8,%r11 + andq %rax,%r11 + movq %r11,8(%rdi) + movq 72(%rsi),%r9 + movq %r8,%r11 + shrq $10,%r11 + andq %rax,%r11 + movq %r11,16(%rdi) + shrdq $39,%r9,%r8 + andq %rax,%r8 + movq %r8,24(%rdi) + movq 80(%rsi),%r10 + movq %r9,%r8 + shrq $4,%r8 + andq %rax,%r8 + movq %r8,32(%rdi) + movq %r9,%r11 + shrq $33,%r11 + andq %rax,%r11 + movq %r11,40(%rdi) + shrdq $62,%r10,%r9 + andq %rax,%r9 + movq %r9,48(%rdi) + movq 88(%rsi),%r11 + movq %r10,%r9 + shrq $27,%r9 + andq %rax,%r9 + movq %r9,56(%rdi) + shrdq $56,%r11,%r10 + andq %rax,%r10 + movq %r10,64(%rdi) + movq 96(%rsi),%r8 + movq %r11,%r10 + shrq $21,%r10 + andq %rax,%r10 + movq %r10,72(%rdi) + shrdq $50,%r8,%r11 + andq %rax,%r11 + movq %r11,80(%rdi) + movq 104(%rsi),%r9 + movq %r8,%r11 + shrq $15,%r11 + andq %rax,%r11 + movq %r11,88(%rdi) + shrdq $44,%r9,%r8 + andq %rax,%r8 + movq %r8,96(%rdi) + movq 112(%rsi),%r10 + movq %r9,%r8 + shrq $9,%r8 + andq %rax,%r8 + movq %r8,104(%rdi) + shrdq $38,%r10,%r9 + andq %rax,%r9 + movq %r9,112(%rdi) + movq 120(%rsi),%r11 + movq %r10,%r9 + shrq $3,%r9 + andq %rax,%r9 + movq %r9,120(%rdi) + movq %r10,%r8 + shrq $32,%r8 + andq %rax,%r8 + movq %r8,128(%rdi) + shrdq $61,%r11,%r10 + andq %rax,%r10 + movq %r10,136(%rdi) + xorq %r8,%r8 + movq %r11,%r10 + shrq $26,%r10 + andq %rax,%r10 + movq %r10,144(%rdi) + shrdq $55,%r8,%r11 + andq %rax,%r11 + movq %r11,152(%rdi) + movq %r8,160(%rdi) + movq %r8,168(%rdi) + movq %r8,176(%rdi) + movq %r8,184(%rdi) + ret +.cfi_endproc +.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 +.globl rsaz_1024_scatter5_avx2 +.hidden rsaz_1024_scatter5_avx2 +.type rsaz_1024_scatter5_avx2,@function +.align 32 +rsaz_1024_scatter5_avx2: +.cfi_startproc +_CET_ENDBR + vzeroupper + vmovdqu .Lscatter_permd(%rip),%ymm5 + shll $4,%edx + leaq (%rdi,%rdx,1),%rdi + movl $9,%eax + jmp .Loop_scatter_1024 + +.align 32 +.Loop_scatter_1024: + vmovdqu (%rsi),%ymm0 + leaq 32(%rsi),%rsi + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,(%rdi) + leaq 512(%rdi),%rdi + decl %eax + jnz .Loop_scatter_1024 + + vzeroupper + ret +.cfi_endproc +.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 + +.globl rsaz_1024_gather5_avx2 +.hidden rsaz_1024_gather5_avx2 +.type rsaz_1024_gather5_avx2,@function +.align 32 +rsaz_1024_gather5_avx2: +.cfi_startproc +_CET_ENDBR + vzeroupper + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + leaq -256(%rsp),%rsp + andq $-32,%rsp + leaq .Linc(%rip),%r10 + leaq -128(%rsp),%rax + + vmovd %edx,%xmm4 + vmovdqa (%r10),%ymm0 + vmovdqa 32(%r10),%ymm1 + vmovdqa 64(%r10),%ymm5 + vpbroadcastd %xmm4,%ymm4 + + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,0+128(%rax) + vpaddd %ymm5,%ymm2,%ymm0 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,32+128(%rax) + vpaddd %ymm5,%ymm3,%ymm1 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,64+128(%rax) + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vmovdqa %ymm3,96+128(%rax) + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,128+128(%rax) + vpaddd %ymm5,%ymm2,%ymm8 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,160+128(%rax) + vpaddd %ymm5,%ymm3,%ymm9 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,192+128(%rax) + vpaddd %ymm5,%ymm8,%ymm10 + vpcmpeqd %ymm4,%ymm8,%ymm8 + vmovdqa %ymm3,224+128(%rax) + vpaddd %ymm5,%ymm9,%ymm11 + vpcmpeqd %ymm4,%ymm9,%ymm9 + vpaddd %ymm5,%ymm10,%ymm12 + vpcmpeqd %ymm4,%ymm10,%ymm10 + vpaddd %ymm5,%ymm11,%ymm13 + vpcmpeqd %ymm4,%ymm11,%ymm11 + vpaddd %ymm5,%ymm12,%ymm14 + vpcmpeqd %ymm4,%ymm12,%ymm12 + vpaddd %ymm5,%ymm13,%ymm15 + vpcmpeqd %ymm4,%ymm13,%ymm13 + vpcmpeqd %ymm4,%ymm14,%ymm14 + vpcmpeqd %ymm4,%ymm15,%ymm15 + + vmovdqa -32(%r10),%ymm7 + leaq 128(%rsi),%rsi + movl $9,%edx + +.Loop_gather_1024: + vmovdqa 0-128(%rsi),%ymm0 + vmovdqa 32-128(%rsi),%ymm1 + vmovdqa 64-128(%rsi),%ymm2 + vmovdqa 96-128(%rsi),%ymm3 + vpand 0+128(%rax),%ymm0,%ymm0 + vpand 32+128(%rax),%ymm1,%ymm1 + vpand 64+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm1,%ymm4 + vpand 96+128(%rax),%ymm3,%ymm3 + vmovdqa 128-128(%rsi),%ymm0 + vmovdqa 160-128(%rsi),%ymm1 + vpor %ymm2,%ymm3,%ymm5 + vmovdqa 192-128(%rsi),%ymm2 + vmovdqa 224-128(%rsi),%ymm3 + vpand 128+128(%rax),%ymm0,%ymm0 + vpand 160+128(%rax),%ymm1,%ymm1 + vpand 192+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm4,%ymm4 + vpand 224+128(%rax),%ymm3,%ymm3 + vpand 256-128(%rsi),%ymm8,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 288-128(%rsi),%ymm9,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 320-128(%rsi),%ymm10,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 352-128(%rsi),%ymm11,%ymm3 + vpor %ymm0,%ymm4,%ymm4 + vpand 384-128(%rsi),%ymm12,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 416-128(%rsi),%ymm13,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 448-128(%rsi),%ymm14,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 480-128(%rsi),%ymm15,%ymm3 + leaq 512(%rsi),%rsi + vpor %ymm0,%ymm4,%ymm4 + vpor %ymm1,%ymm5,%ymm5 + vpor %ymm2,%ymm4,%ymm4 + vpor %ymm3,%ymm5,%ymm5 + + vpor %ymm5,%ymm4,%ymm4 + vextracti128 $1,%ymm4,%xmm5 + vpor %xmm4,%xmm5,%xmm5 + vpermd %ymm5,%ymm7,%ymm5 + vmovdqu %ymm5,(%rdi) + leaq 32(%rdi),%rdi + decl %edx + jnz .Loop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + vzeroupper + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp + ret +.cfi_endproc +.LSEH_end_rsaz_1024_gather5: +.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 +.section .rodata +.align 64 +.Land_mask: +.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff +.Lscatter_permd: +.long 0,2,4,6,7,7,7,7 +.Lgather_permd: +.long 0,7,1,7,2,7,3,7 +.Linc: +.long 0,0,0,0, 1,1,1,1 +.long 2,2,2,2, 3,3,3,3 +.long 4,4,4,4, 4,4,4,4 +.align 64 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-win.asm b/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-win.asm new file mode 100644 index 0000000000..beadbdde04 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/rsaz-avx2-win.asm @@ -0,0 +1,1987 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +global rsaz_1024_sqr_avx2 + +ALIGN 64 +rsaz_1024_sqr_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_1024_sqr_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + lea rax,[rsp] + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + vzeroupper + lea rsp,[((-168))+rsp] + vmovaps XMMWORD[(-216)+rax],xmm6 + vmovaps XMMWORD[(-200)+rax],xmm7 + vmovaps XMMWORD[(-184)+rax],xmm8 + vmovaps XMMWORD[(-168)+rax],xmm9 + vmovaps XMMWORD[(-152)+rax],xmm10 + vmovaps XMMWORD[(-136)+rax],xmm11 + vmovaps XMMWORD[(-120)+rax],xmm12 + vmovaps XMMWORD[(-104)+rax],xmm13 + vmovaps XMMWORD[(-88)+rax],xmm14 + vmovaps XMMWORD[(-72)+rax],xmm15 +$L$sqr_1024_body: + mov rbp,rax + + mov r13,rdx + sub rsp,832 + mov r15,r13 + sub rdi,-128 + sub rsi,-128 + sub r13,-128 + + and r15,4095 + add r15,32*10 + shr r15,12 + vpxor ymm9,ymm9,ymm9 + jz NEAR $L$sqr_1024_no_n_copy + + + + + + sub rsp,32*10 + vmovdqu ymm0,YMMWORD[((0-128))+r13] + and rsp,-2048 + vmovdqu ymm1,YMMWORD[((32-128))+r13] + vmovdqu ymm2,YMMWORD[((64-128))+r13] + vmovdqu ymm3,YMMWORD[((96-128))+r13] + vmovdqu ymm4,YMMWORD[((128-128))+r13] + vmovdqu ymm5,YMMWORD[((160-128))+r13] + vmovdqu ymm6,YMMWORD[((192-128))+r13] + vmovdqu ymm7,YMMWORD[((224-128))+r13] + vmovdqu ymm8,YMMWORD[((256-128))+r13] + lea r13,[((832+128))+rsp] + vmovdqu YMMWORD[(0-128)+r13],ymm0 + vmovdqu YMMWORD[(32-128)+r13],ymm1 + vmovdqu YMMWORD[(64-128)+r13],ymm2 + vmovdqu YMMWORD[(96-128)+r13],ymm3 + vmovdqu YMMWORD[(128-128)+r13],ymm4 + vmovdqu YMMWORD[(160-128)+r13],ymm5 + vmovdqu YMMWORD[(192-128)+r13],ymm6 + vmovdqu YMMWORD[(224-128)+r13],ymm7 + vmovdqu YMMWORD[(256-128)+r13],ymm8 + vmovdqu YMMWORD[(288-128)+r13],ymm9 + +$L$sqr_1024_no_n_copy: + and rsp,-1024 + + vmovdqu ymm1,YMMWORD[((32-128))+rsi] + vmovdqu ymm2,YMMWORD[((64-128))+rsi] + vmovdqu ymm3,YMMWORD[((96-128))+rsi] + vmovdqu ymm4,YMMWORD[((128-128))+rsi] + vmovdqu ymm5,YMMWORD[((160-128))+rsi] + vmovdqu ymm6,YMMWORD[((192-128))+rsi] + vmovdqu ymm7,YMMWORD[((224-128))+rsi] + vmovdqu ymm8,YMMWORD[((256-128))+rsi] + + lea rbx,[192+rsp] + vmovdqu ymm15,YMMWORD[$L$and_mask] + jmp NEAR $L$OOP_GRANDE_SQR_1024 + +ALIGN 32 +$L$OOP_GRANDE_SQR_1024: + lea r9,[((576+128))+rsp] + lea r12,[448+rsp] + + + + + vpaddq ymm1,ymm1,ymm1 + vpbroadcastq ymm10,QWORD[((0-128))+rsi] + vpaddq ymm2,ymm2,ymm2 + vmovdqa YMMWORD[(0-128)+r9],ymm1 + vpaddq ymm3,ymm3,ymm3 + vmovdqa YMMWORD[(32-128)+r9],ymm2 + vpaddq ymm4,ymm4,ymm4 + vmovdqa YMMWORD[(64-128)+r9],ymm3 + vpaddq ymm5,ymm5,ymm5 + vmovdqa YMMWORD[(96-128)+r9],ymm4 + vpaddq ymm6,ymm6,ymm6 + vmovdqa YMMWORD[(128-128)+r9],ymm5 + vpaddq ymm7,ymm7,ymm7 + vmovdqa YMMWORD[(160-128)+r9],ymm6 + vpaddq ymm8,ymm8,ymm8 + vmovdqa YMMWORD[(192-128)+r9],ymm7 + vpxor ymm9,ymm9,ymm9 + vmovdqa YMMWORD[(224-128)+r9],ymm8 + + vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] + vpbroadcastq ymm11,QWORD[((32-128))+rsi] + vmovdqu YMMWORD[(288-192)+rbx],ymm9 + vpmuludq ymm1,ymm1,ymm10 + vmovdqu YMMWORD[(320-448)+r12],ymm9 + vpmuludq ymm2,ymm2,ymm10 + vmovdqu YMMWORD[(352-448)+r12],ymm9 + vpmuludq ymm3,ymm3,ymm10 + vmovdqu YMMWORD[(384-448)+r12],ymm9 + vpmuludq ymm4,ymm4,ymm10 + vmovdqu YMMWORD[(416-448)+r12],ymm9 + vpmuludq ymm5,ymm5,ymm10 + vmovdqu YMMWORD[(448-448)+r12],ymm9 + vpmuludq ymm6,ymm6,ymm10 + vmovdqu YMMWORD[(480-448)+r12],ymm9 + vpmuludq ymm7,ymm7,ymm10 + vmovdqu YMMWORD[(512-448)+r12],ymm9 + vpmuludq ymm8,ymm8,ymm10 + vpbroadcastq ymm10,QWORD[((64-128))+rsi] + vmovdqu YMMWORD[(544-448)+r12],ymm9 + + mov r15,rsi + mov r14d,4 + jmp NEAR $L$sqr_entry_1024 +ALIGN 32 +$L$OOP_SQR_1024: + vpbroadcastq ymm11,QWORD[((32-128))+r15] + vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] + vpaddq ymm0,ymm0,YMMWORD[((0-192))+rbx] + vpmuludq ymm1,ymm10,YMMWORD[((0-128))+r9] + vpaddq ymm1,ymm1,YMMWORD[((32-192))+rbx] + vpmuludq ymm2,ymm10,YMMWORD[((32-128))+r9] + vpaddq ymm2,ymm2,YMMWORD[((64-192))+rbx] + vpmuludq ymm3,ymm10,YMMWORD[((64-128))+r9] + vpaddq ymm3,ymm3,YMMWORD[((96-192))+rbx] + vpmuludq ymm4,ymm10,YMMWORD[((96-128))+r9] + vpaddq ymm4,ymm4,YMMWORD[((128-192))+rbx] + vpmuludq ymm5,ymm10,YMMWORD[((128-128))+r9] + vpaddq ymm5,ymm5,YMMWORD[((160-192))+rbx] + vpmuludq ymm6,ymm10,YMMWORD[((160-128))+r9] + vpaddq ymm6,ymm6,YMMWORD[((192-192))+rbx] + vpmuludq ymm7,ymm10,YMMWORD[((192-128))+r9] + vpaddq ymm7,ymm7,YMMWORD[((224-192))+rbx] + vpmuludq ymm8,ymm10,YMMWORD[((224-128))+r9] + vpbroadcastq ymm10,QWORD[((64-128))+r15] + vpaddq ymm8,ymm8,YMMWORD[((256-192))+rbx] +$L$sqr_entry_1024: + vmovdqu YMMWORD[(0-192)+rbx],ymm0 + vmovdqu YMMWORD[(32-192)+rbx],ymm1 + + vpmuludq ymm12,ymm11,YMMWORD[((32-128))+rsi] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm14,ymm11,YMMWORD[((32-128))+r9] + vpaddq ymm3,ymm3,ymm14 + vpmuludq ymm13,ymm11,YMMWORD[((64-128))+r9] + vpaddq ymm4,ymm4,ymm13 + vpmuludq ymm12,ymm11,YMMWORD[((96-128))+r9] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm14,ymm11,YMMWORD[((128-128))+r9] + vpaddq ymm6,ymm6,ymm14 + vpmuludq ymm13,ymm11,YMMWORD[((160-128))+r9] + vpaddq ymm7,ymm7,ymm13 + vpmuludq ymm12,ymm11,YMMWORD[((192-128))+r9] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm0,ymm11,YMMWORD[((224-128))+r9] + vpbroadcastq ymm11,QWORD[((96-128))+r15] + vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] + + vmovdqu YMMWORD[(64-192)+rbx],ymm2 + vmovdqu YMMWORD[(96-192)+rbx],ymm3 + + vpmuludq ymm13,ymm10,YMMWORD[((64-128))+rsi] + vpaddq ymm4,ymm4,ymm13 + vpmuludq ymm12,ymm10,YMMWORD[((64-128))+r9] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm14,ymm10,YMMWORD[((96-128))+r9] + vpaddq ymm6,ymm6,ymm14 + vpmuludq ymm13,ymm10,YMMWORD[((128-128))+r9] + vpaddq ymm7,ymm7,ymm13 + vpmuludq ymm12,ymm10,YMMWORD[((160-128))+r9] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] + vpaddq ymm0,ymm0,ymm14 + vpmuludq ymm1,ymm10,YMMWORD[((224-128))+r9] + vpbroadcastq ymm10,QWORD[((128-128))+r15] + vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] + + vmovdqu YMMWORD[(128-192)+rbx],ymm4 + vmovdqu YMMWORD[(160-192)+rbx],ymm5 + + vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rsi] + vpaddq ymm6,ymm6,ymm12 + vpmuludq ymm14,ymm11,YMMWORD[((96-128))+r9] + vpaddq ymm7,ymm7,ymm14 + vpmuludq ymm13,ymm11,YMMWORD[((128-128))+r9] + vpaddq ymm8,ymm8,ymm13 + vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] + vpaddq ymm0,ymm0,ymm12 + vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] + vpaddq ymm1,ymm1,ymm14 + vpmuludq ymm2,ymm11,YMMWORD[((224-128))+r9] + vpbroadcastq ymm11,QWORD[((160-128))+r15] + vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] + + vmovdqu YMMWORD[(192-192)+rbx],ymm6 + vmovdqu YMMWORD[(224-192)+rbx],ymm7 + + vpmuludq ymm12,ymm10,YMMWORD[((128-128))+rsi] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm14,ymm10,YMMWORD[((128-128))+r9] + vpaddq ymm0,ymm0,ymm14 + vpmuludq ymm13,ymm10,YMMWORD[((160-128))+r9] + vpaddq ymm1,ymm1,ymm13 + vpmuludq ymm12,ymm10,YMMWORD[((192-128))+r9] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm3,ymm10,YMMWORD[((224-128))+r9] + vpbroadcastq ymm10,QWORD[((192-128))+r15] + vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] + + vmovdqu YMMWORD[(256-192)+rbx],ymm8 + vmovdqu YMMWORD[(288-192)+rbx],ymm0 + lea rbx,[8+rbx] + + vpmuludq ymm13,ymm11,YMMWORD[((160-128))+rsi] + vpaddq ymm1,ymm1,ymm13 + vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] + vpaddq ymm3,ymm3,ymm14 + vpmuludq ymm4,ymm11,YMMWORD[((224-128))+r9] + vpbroadcastq ymm11,QWORD[((224-128))+r15] + vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] + + vmovdqu YMMWORD[(320-448)+r12],ymm1 + vmovdqu YMMWORD[(352-448)+r12],ymm2 + + vpmuludq ymm12,ymm10,YMMWORD[((192-128))+rsi] + vpaddq ymm3,ymm3,ymm12 + vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] + vpbroadcastq ymm0,QWORD[((256-128))+r15] + vpaddq ymm4,ymm4,ymm14 + vpmuludq ymm5,ymm10,YMMWORD[((224-128))+r9] + vpbroadcastq ymm10,QWORD[((0+8-128))+r15] + vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] + + vmovdqu YMMWORD[(384-448)+r12],ymm3 + vmovdqu YMMWORD[(416-448)+r12],ymm4 + lea r15,[8+r15] + + vpmuludq ymm12,ymm11,YMMWORD[((224-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm6,ymm11,YMMWORD[((224-128))+r9] + vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] + + vpmuludq ymm7,ymm0,YMMWORD[((256-128))+rsi] + vmovdqu YMMWORD[(448-448)+r12],ymm5 + vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] + vmovdqu YMMWORD[(480-448)+r12],ymm6 + vmovdqu YMMWORD[(512-448)+r12],ymm7 + lea r12,[8+r12] + + dec r14d + jnz NEAR $L$OOP_SQR_1024 + + vmovdqu ymm8,YMMWORD[256+rsp] + vmovdqu ymm1,YMMWORD[288+rsp] + vmovdqu ymm2,YMMWORD[320+rsp] + lea rbx,[192+rsp] + + vpsrlq ymm14,ymm8,29 + vpand ymm8,ymm8,ymm15 + vpsrlq ymm11,ymm1,29 + vpand ymm1,ymm1,ymm15 + + vpermq ymm14,ymm14,0x93 + vpxor ymm9,ymm9,ymm9 + vpermq ymm11,ymm11,0x93 + + vpblendd ymm10,ymm14,ymm9,3 + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm8,ymm8,ymm10 + vpblendd ymm11,ymm9,ymm11,3 + vpaddq ymm1,ymm1,ymm14 + vpaddq ymm2,ymm2,ymm11 + vmovdqu YMMWORD[(288-192)+rbx],ymm1 + vmovdqu YMMWORD[(320-192)+rbx],ymm2 + + mov rax,QWORD[rsp] + mov r10,QWORD[8+rsp] + mov r11,QWORD[16+rsp] + mov r12,QWORD[24+rsp] + vmovdqu ymm1,YMMWORD[32+rsp] + vmovdqu ymm2,YMMWORD[((64-192))+rbx] + vmovdqu ymm3,YMMWORD[((96-192))+rbx] + vmovdqu ymm4,YMMWORD[((128-192))+rbx] + vmovdqu ymm5,YMMWORD[((160-192))+rbx] + vmovdqu ymm6,YMMWORD[((192-192))+rbx] + vmovdqu ymm7,YMMWORD[((224-192))+rbx] + + mov r9,rax + imul eax,ecx + and eax,0x1fffffff + vmovd xmm12,eax + + mov rdx,rax + imul rax,QWORD[((-128))+r13] + vpbroadcastq ymm12,xmm12 + add r9,rax + mov rax,rdx + imul rax,QWORD[((8-128))+r13] + shr r9,29 + add r10,rax + mov rax,rdx + imul rax,QWORD[((16-128))+r13] + add r10,r9 + add r11,rax + imul rdx,QWORD[((24-128))+r13] + add r12,rdx + + mov rax,r10 + imul eax,ecx + and eax,0x1fffffff + + mov r14d,9 + jmp NEAR $L$OOP_REDUCE_1024 + +ALIGN 32 +$L$OOP_REDUCE_1024: + vmovd xmm13,eax + vpbroadcastq ymm13,xmm13 + + vpmuludq ymm10,ymm12,YMMWORD[((32-128))+r13] + mov rdx,rax + imul rax,QWORD[((-128))+r13] + vpaddq ymm1,ymm1,ymm10 + add r10,rax + vpmuludq ymm14,ymm12,YMMWORD[((64-128))+r13] + mov rax,rdx + imul rax,QWORD[((8-128))+r13] + vpaddq ymm2,ymm2,ymm14 + vpmuludq ymm11,ymm12,YMMWORD[((96-128))+r13] + DB 0x67 + add r11,rax + DB 0x67 + mov rax,rdx + imul rax,QWORD[((16-128))+r13] + shr r10,29 + vpaddq ymm3,ymm3,ymm11 + vpmuludq ymm10,ymm12,YMMWORD[((128-128))+r13] + add r12,rax + add r11,r10 + vpaddq ymm4,ymm4,ymm10 + vpmuludq ymm14,ymm12,YMMWORD[((160-128))+r13] + mov rax,r11 + imul eax,ecx + vpaddq ymm5,ymm5,ymm14 + vpmuludq ymm11,ymm12,YMMWORD[((192-128))+r13] + and eax,0x1fffffff + vpaddq ymm6,ymm6,ymm11 + vpmuludq ymm10,ymm12,YMMWORD[((224-128))+r13] + vpaddq ymm7,ymm7,ymm10 + vpmuludq ymm14,ymm12,YMMWORD[((256-128))+r13] + vmovd xmm12,eax + + vpaddq ymm8,ymm8,ymm14 + + vpbroadcastq ymm12,xmm12 + + vpmuludq ymm11,ymm13,YMMWORD[((32-8-128))+r13] + vmovdqu ymm14,YMMWORD[((96-8-128))+r13] + mov rdx,rax + imul rax,QWORD[((-128))+r13] + vpaddq ymm1,ymm1,ymm11 + vpmuludq ymm10,ymm13,YMMWORD[((64-8-128))+r13] + vmovdqu ymm11,YMMWORD[((128-8-128))+r13] + add r11,rax + mov rax,rdx + imul rax,QWORD[((8-128))+r13] + vpaddq ymm2,ymm2,ymm10 + add rax,r12 + shr r11,29 + vpmuludq ymm14,ymm14,ymm13 + vmovdqu ymm10,YMMWORD[((160-8-128))+r13] + add rax,r11 + vpaddq ymm3,ymm3,ymm14 + vpmuludq ymm11,ymm11,ymm13 + vmovdqu ymm14,YMMWORD[((192-8-128))+r13] + DB 0x67 + mov r12,rax + imul eax,ecx + vpaddq ymm4,ymm4,ymm11 + vpmuludq ymm10,ymm10,ymm13 + DB 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + and eax,0x1fffffff + vpaddq ymm5,ymm5,ymm10 + vpmuludq ymm14,ymm14,ymm13 + vmovdqu ymm10,YMMWORD[((256-8-128))+r13] + vpaddq ymm6,ymm6,ymm14 + vpmuludq ymm11,ymm11,ymm13 + vmovdqu ymm9,YMMWORD[((288-8-128))+r13] + vmovd xmm0,eax + imul rax,QWORD[((-128))+r13] + vpaddq ymm7,ymm7,ymm11 + vpmuludq ymm10,ymm10,ymm13 + vmovdqu ymm14,YMMWORD[((32-16-128))+r13] + vpbroadcastq ymm0,xmm0 + vpaddq ymm8,ymm8,ymm10 + vpmuludq ymm9,ymm9,ymm13 + vmovdqu ymm11,YMMWORD[((64-16-128))+r13] + add r12,rax + + vmovdqu ymm13,YMMWORD[((32-24-128))+r13] + vpmuludq ymm14,ymm14,ymm12 + vmovdqu ymm10,YMMWORD[((96-16-128))+r13] + vpaddq ymm1,ymm1,ymm14 + vpmuludq ymm13,ymm13,ymm0 + vpmuludq ymm11,ymm11,ymm12 + DB 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq ymm13,ymm13,ymm1 + vpaddq ymm2,ymm2,ymm11 + vpmuludq ymm10,ymm10,ymm12 + vmovdqu ymm11,YMMWORD[((160-16-128))+r13] + DB 0x67 + vmovq rax,xmm13 + vmovdqu YMMWORD[rsp],ymm13 + vpaddq ymm3,ymm3,ymm10 + vpmuludq ymm14,ymm14,ymm12 + vmovdqu ymm10,YMMWORD[((192-16-128))+r13] + vpaddq ymm4,ymm4,ymm14 + vpmuludq ymm11,ymm11,ymm12 + vmovdqu ymm14,YMMWORD[((224-16-128))+r13] + vpaddq ymm5,ymm5,ymm11 + vpmuludq ymm10,ymm10,ymm12 + vmovdqu ymm11,YMMWORD[((256-16-128))+r13] + vpaddq ymm6,ymm6,ymm10 + vpmuludq ymm14,ymm14,ymm12 + shr r12,29 + vmovdqu ymm10,YMMWORD[((288-16-128))+r13] + add rax,r12 + vpaddq ymm7,ymm7,ymm14 + vpmuludq ymm11,ymm11,ymm12 + + mov r9,rax + imul eax,ecx + vpaddq ymm8,ymm8,ymm11 + vpmuludq ymm10,ymm10,ymm12 + and eax,0x1fffffff + vmovd xmm12,eax + vmovdqu ymm11,YMMWORD[((96-24-128))+r13] + DB 0x67 + vpaddq ymm9,ymm9,ymm10 + vpbroadcastq ymm12,xmm12 + + vpmuludq ymm14,ymm0,YMMWORD[((64-24-128))+r13] + vmovdqu ymm10,YMMWORD[((128-24-128))+r13] + mov rdx,rax + imul rax,QWORD[((-128))+r13] + mov r10,QWORD[8+rsp] + vpaddq ymm1,ymm2,ymm14 + vpmuludq ymm11,ymm11,ymm0 + vmovdqu ymm14,YMMWORD[((160-24-128))+r13] + add r9,rax + mov rax,rdx + imul rax,QWORD[((8-128))+r13] + DB 0x67 + shr r9,29 + mov r11,QWORD[16+rsp] + vpaddq ymm2,ymm3,ymm11 + vpmuludq ymm10,ymm10,ymm0 + vmovdqu ymm11,YMMWORD[((192-24-128))+r13] + add r10,rax + mov rax,rdx + imul rax,QWORD[((16-128))+r13] + vpaddq ymm3,ymm4,ymm10 + vpmuludq ymm14,ymm14,ymm0 + vmovdqu ymm10,YMMWORD[((224-24-128))+r13] + imul rdx,QWORD[((24-128))+r13] + add r11,rax + lea rax,[r10*1+r9] + vpaddq ymm4,ymm5,ymm14 + vpmuludq ymm11,ymm11,ymm0 + vmovdqu ymm14,YMMWORD[((256-24-128))+r13] + mov r10,rax + imul eax,ecx + vpmuludq ymm10,ymm10,ymm0 + vpaddq ymm5,ymm6,ymm11 + vmovdqu ymm11,YMMWORD[((288-24-128))+r13] + and eax,0x1fffffff + vpaddq ymm6,ymm7,ymm10 + vpmuludq ymm14,ymm14,ymm0 + add rdx,QWORD[24+rsp] + vpaddq ymm7,ymm8,ymm14 + vpmuludq ymm11,ymm11,ymm0 + vpaddq ymm8,ymm9,ymm11 + vmovq xmm9,r12 + mov r12,rdx + + dec r14d + jnz NEAR $L$OOP_REDUCE_1024 + lea r12,[448+rsp] + vpaddq ymm0,ymm13,ymm9 + vpxor ymm9,ymm9,ymm9 + + vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] + vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] + vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] + vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] + vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] + vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] + vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] + vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] + vpaddq ymm8,ymm8,YMMWORD[((544-448))+r12] + + vpsrlq ymm14,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm11,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm12,ymm2,29 + vpermq ymm14,ymm14,0x93 + vpand ymm2,ymm2,ymm15 + vpsrlq ymm13,ymm3,29 + vpermq ymm11,ymm11,0x93 + vpand ymm3,ymm3,ymm15 + vpermq ymm12,ymm12,0x93 + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm13,ymm13,0x93 + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm0,ymm0,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm1,ymm1,ymm14 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm2,ymm2,ymm11 + vpblendd ymm13,ymm9,ymm13,3 + vpaddq ymm3,ymm3,ymm12 + vpaddq ymm4,ymm4,ymm13 + + vpsrlq ymm14,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm11,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm12,ymm2,29 + vpermq ymm14,ymm14,0x93 + vpand ymm2,ymm2,ymm15 + vpsrlq ymm13,ymm3,29 + vpermq ymm11,ymm11,0x93 + vpand ymm3,ymm3,ymm15 + vpermq ymm12,ymm12,0x93 + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm13,ymm13,0x93 + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm0,ymm0,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm1,ymm1,ymm14 + vmovdqu YMMWORD[(0-128)+rdi],ymm0 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm2,ymm2,ymm11 + vmovdqu YMMWORD[(32-128)+rdi],ymm1 + vpblendd ymm13,ymm9,ymm13,3 + vpaddq ymm3,ymm3,ymm12 + vmovdqu YMMWORD[(64-128)+rdi],ymm2 + vpaddq ymm4,ymm4,ymm13 + vmovdqu YMMWORD[(96-128)+rdi],ymm3 + vpsrlq ymm14,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm11,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm12,ymm6,29 + vpermq ymm14,ymm14,0x93 + vpand ymm6,ymm6,ymm15 + vpsrlq ymm13,ymm7,29 + vpermq ymm11,ymm11,0x93 + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm12,ymm12,0x93 + vpand ymm8,ymm8,ymm15 + vpermq ymm13,ymm13,0x93 + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm0,ymm0,0x93 + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm4,ymm4,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm5,ymm5,ymm14 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm6,ymm6,ymm11 + vpblendd ymm13,ymm0,ymm13,3 + vpaddq ymm7,ymm7,ymm12 + vpaddq ymm8,ymm8,ymm13 + + vpsrlq ymm14,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm11,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm12,ymm6,29 + vpermq ymm14,ymm14,0x93 + vpand ymm6,ymm6,ymm15 + vpsrlq ymm13,ymm7,29 + vpermq ymm11,ymm11,0x93 + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm12,ymm12,0x93 + vpand ymm8,ymm8,ymm15 + vpermq ymm13,ymm13,0x93 + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm0,ymm0,0x93 + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm4,ymm4,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm5,ymm5,ymm14 + vmovdqu YMMWORD[(128-128)+rdi],ymm4 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm6,ymm6,ymm11 + vmovdqu YMMWORD[(160-128)+rdi],ymm5 + vpblendd ymm13,ymm0,ymm13,3 + vpaddq ymm7,ymm7,ymm12 + vmovdqu YMMWORD[(192-128)+rdi],ymm6 + vpaddq ymm8,ymm8,ymm13 + vmovdqu YMMWORD[(224-128)+rdi],ymm7 + vmovdqu YMMWORD[(256-128)+rdi],ymm8 + + mov rsi,rdi + dec r8d + jne NEAR $L$OOP_GRANDE_SQR_1024 + + vzeroall + mov rax,rbp + +$L$sqr_1024_in_tail: + movaps xmm6,XMMWORD[((-216))+rax] + movaps xmm7,XMMWORD[((-200))+rax] + movaps xmm8,XMMWORD[((-184))+rax] + movaps xmm9,XMMWORD[((-168))+rax] + movaps xmm10,XMMWORD[((-152))+rax] + movaps xmm11,XMMWORD[((-136))+rax] + movaps xmm12,XMMWORD[((-120))+rax] + movaps xmm13,XMMWORD[((-104))+rax] + movaps xmm14,XMMWORD[((-88))+rax] + movaps xmm15,XMMWORD[((-72))+rax] + mov r15,QWORD[((-48))+rax] + + mov r14,QWORD[((-40))+rax] + + mov r13,QWORD[((-32))+rax] + + mov r12,QWORD[((-24))+rax] + + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$sqr_1024_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_rsaz_1024_sqr_avx2: +global rsaz_1024_mul_avx2 + +ALIGN 64 +rsaz_1024_mul_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_1024_mul_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + lea rax,[rsp] + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + vzeroupper + lea rsp,[((-168))+rsp] + vmovaps XMMWORD[(-216)+rax],xmm6 + vmovaps XMMWORD[(-200)+rax],xmm7 + vmovaps XMMWORD[(-184)+rax],xmm8 + vmovaps XMMWORD[(-168)+rax],xmm9 + vmovaps XMMWORD[(-152)+rax],xmm10 + vmovaps XMMWORD[(-136)+rax],xmm11 + vmovaps XMMWORD[(-120)+rax],xmm12 + vmovaps XMMWORD[(-104)+rax],xmm13 + vmovaps XMMWORD[(-88)+rax],xmm14 + vmovaps XMMWORD[(-72)+rax],xmm15 +$L$mul_1024_body: + mov rbp,rax + + vzeroall + mov r13,rdx + sub rsp,64 + + + + + + + DB 0x67,0x67 + mov r15,rsi + and r15,4095 + add r15,32*10 + shr r15,12 + mov r15,rsi + cmovnz rsi,r13 + cmovnz r13,r15 + + mov r15,rcx + sub rsi,-128 + sub rcx,-128 + sub rdi,-128 + + and r15,4095 + add r15,32*10 + DB 0x67,0x67 + shr r15,12 + jz NEAR $L$mul_1024_no_n_copy + + + + + + sub rsp,32*10 + vmovdqu ymm0,YMMWORD[((0-128))+rcx] + and rsp,-512 + vmovdqu ymm1,YMMWORD[((32-128))+rcx] + vmovdqu ymm2,YMMWORD[((64-128))+rcx] + vmovdqu ymm3,YMMWORD[((96-128))+rcx] + vmovdqu ymm4,YMMWORD[((128-128))+rcx] + vmovdqu ymm5,YMMWORD[((160-128))+rcx] + vmovdqu ymm6,YMMWORD[((192-128))+rcx] + vmovdqu ymm7,YMMWORD[((224-128))+rcx] + vmovdqu ymm8,YMMWORD[((256-128))+rcx] + lea rcx,[((64+128))+rsp] + vmovdqu YMMWORD[(0-128)+rcx],ymm0 + vpxor ymm0,ymm0,ymm0 + vmovdqu YMMWORD[(32-128)+rcx],ymm1 + vpxor ymm1,ymm1,ymm1 + vmovdqu YMMWORD[(64-128)+rcx],ymm2 + vpxor ymm2,ymm2,ymm2 + vmovdqu YMMWORD[(96-128)+rcx],ymm3 + vpxor ymm3,ymm3,ymm3 + vmovdqu YMMWORD[(128-128)+rcx],ymm4 + vpxor ymm4,ymm4,ymm4 + vmovdqu YMMWORD[(160-128)+rcx],ymm5 + vpxor ymm5,ymm5,ymm5 + vmovdqu YMMWORD[(192-128)+rcx],ymm6 + vpxor ymm6,ymm6,ymm6 + vmovdqu YMMWORD[(224-128)+rcx],ymm7 + vpxor ymm7,ymm7,ymm7 + vmovdqu YMMWORD[(256-128)+rcx],ymm8 + vmovdqa ymm8,ymm0 + vmovdqu YMMWORD[(288-128)+rcx],ymm9 +$L$mul_1024_no_n_copy: + and rsp,-64 + + mov rbx,QWORD[r13] + vpbroadcastq ymm10,QWORD[r13] + vmovdqu YMMWORD[rsp],ymm0 + xor r9,r9 + DB 0x67 + xor r10,r10 + xor r11,r11 + xor r12,r12 + + vmovdqu ymm15,YMMWORD[$L$and_mask] + mov r14d,9 + vmovdqu YMMWORD[(288-128)+rdi],ymm9 + jmp NEAR $L$oop_mul_1024 + +ALIGN 32 +$L$oop_mul_1024: + vpsrlq ymm9,ymm3,29 + mov rax,rbx + imul rax,QWORD[((-128))+rsi] + add rax,r9 + mov r10,rbx + imul r10,QWORD[((8-128))+rsi] + add r10,QWORD[8+rsp] + + mov r9,rax + imul eax,r8d + and eax,0x1fffffff + + mov r11,rbx + imul r11,QWORD[((16-128))+rsi] + add r11,QWORD[16+rsp] + + mov r12,rbx + imul r12,QWORD[((24-128))+rsi] + add r12,QWORD[24+rsp] + vpmuludq ymm0,ymm10,YMMWORD[((32-128))+rsi] + vmovd xmm11,eax + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm10,YMMWORD[((64-128))+rsi] + vpbroadcastq ymm11,xmm11 + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm10,YMMWORD[((96-128))+rsi] + vpand ymm3,ymm3,ymm15 + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm10,YMMWORD[((128-128))+rsi] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm10,YMMWORD[((160-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm10,YMMWORD[((192-128))+rsi] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm10,YMMWORD[((224-128))+rsi] + vpermq ymm9,ymm9,0x93 + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm10,YMMWORD[((256-128))+rsi] + vpbroadcastq ymm10,QWORD[8+r13] + vpaddq ymm8,ymm8,ymm12 + + mov rdx,rax + imul rax,QWORD[((-128))+rcx] + add r9,rax + mov rax,rdx + imul rax,QWORD[((8-128))+rcx] + add r10,rax + mov rax,rdx + imul rax,QWORD[((16-128))+rcx] + add r11,rax + shr r9,29 + imul rdx,QWORD[((24-128))+rcx] + add r12,rdx + add r10,r9 + + vpmuludq ymm13,ymm11,YMMWORD[((32-128))+rcx] + vmovq rbx,xmm10 + vpaddq ymm1,ymm1,ymm13 + vpmuludq ymm0,ymm11,YMMWORD[((64-128))+rcx] + vpaddq ymm2,ymm2,ymm0 + vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rcx] + vpaddq ymm3,ymm3,ymm12 + vpmuludq ymm13,ymm11,YMMWORD[((128-128))+rcx] + vpaddq ymm4,ymm4,ymm13 + vpmuludq ymm0,ymm11,YMMWORD[((160-128))+rcx] + vpaddq ymm5,ymm5,ymm0 + vpmuludq ymm12,ymm11,YMMWORD[((192-128))+rcx] + vpaddq ymm6,ymm6,ymm12 + vpmuludq ymm13,ymm11,YMMWORD[((224-128))+rcx] + vpblendd ymm12,ymm9,ymm14,3 + vpaddq ymm7,ymm7,ymm13 + vpmuludq ymm0,ymm11,YMMWORD[((256-128))+rcx] + vpaddq ymm3,ymm3,ymm12 + vpaddq ymm8,ymm8,ymm0 + + mov rax,rbx + imul rax,QWORD[((-128))+rsi] + add r10,rax + vmovdqu ymm12,YMMWORD[((-8+32-128))+rsi] + mov rax,rbx + imul rax,QWORD[((8-128))+rsi] + add r11,rax + vmovdqu ymm13,YMMWORD[((-8+64-128))+rsi] + + mov rax,r10 + vpblendd ymm9,ymm9,ymm14,0xfc + imul eax,r8d + vpaddq ymm4,ymm4,ymm9 + and eax,0x1fffffff + + imul rbx,QWORD[((16-128))+rsi] + add r12,rbx + vpmuludq ymm12,ymm12,ymm10 + vmovd xmm11,eax + vmovdqu ymm0,YMMWORD[((-8+96-128))+rsi] + vpaddq ymm1,ymm1,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpbroadcastq ymm11,xmm11 + vmovdqu ymm12,YMMWORD[((-8+128-128))+rsi] + vpaddq ymm2,ymm2,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD[((-8+160-128))+rsi] + vpaddq ymm3,ymm3,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm0,YMMWORD[((-8+192-128))+rsi] + vpaddq ymm4,ymm4,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD[((-8+224-128))+rsi] + vpaddq ymm5,ymm5,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD[((-8+256-128))+rsi] + vpaddq ymm6,ymm6,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm9,YMMWORD[((-8+288-128))+rsi] + vpaddq ymm7,ymm7,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpaddq ymm8,ymm8,ymm13 + vpmuludq ymm9,ymm9,ymm10 + vpbroadcastq ymm10,QWORD[16+r13] + + mov rdx,rax + imul rax,QWORD[((-128))+rcx] + add r10,rax + vmovdqu ymm0,YMMWORD[((-8+32-128))+rcx] + mov rax,rdx + imul rax,QWORD[((8-128))+rcx] + add r11,rax + vmovdqu ymm12,YMMWORD[((-8+64-128))+rcx] + shr r10,29 + imul rdx,QWORD[((16-128))+rcx] + add r12,rdx + add r11,r10 + + vpmuludq ymm0,ymm0,ymm11 + vmovq rbx,xmm10 + vmovdqu ymm13,YMMWORD[((-8+96-128))+rcx] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD[((-8+128-128))+rcx] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-8+160-128))+rcx] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD[((-8+192-128))+rcx] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD[((-8+224-128))+rcx] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-8+256-128))+rcx] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD[((-8+288-128))+rcx] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vpaddq ymm9,ymm9,ymm13 + + vmovdqu ymm0,YMMWORD[((-16+32-128))+rsi] + mov rax,rbx + imul rax,QWORD[((-128))+rsi] + add rax,r11 + + vmovdqu ymm12,YMMWORD[((-16+64-128))+rsi] + mov r11,rax + imul eax,r8d + and eax,0x1fffffff + + imul rbx,QWORD[((8-128))+rsi] + add r12,rbx + vpmuludq ymm0,ymm0,ymm10 + vmovd xmm11,eax + vmovdqu ymm13,YMMWORD[((-16+96-128))+rsi] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpbroadcastq ymm11,xmm11 + vmovdqu ymm0,YMMWORD[((-16+128-128))+rsi] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD[((-16+160-128))+rsi] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD[((-16+192-128))+rsi] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm0,YMMWORD[((-16+224-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD[((-16+256-128))+rsi] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD[((-16+288-128))+rsi] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpbroadcastq ymm10,QWORD[24+r13] + vpaddq ymm9,ymm9,ymm13 + + vmovdqu ymm0,YMMWORD[((-16+32-128))+rcx] + mov rdx,rax + imul rax,QWORD[((-128))+rcx] + add r11,rax + vmovdqu ymm12,YMMWORD[((-16+64-128))+rcx] + imul rdx,QWORD[((8-128))+rcx] + add r12,rdx + shr r11,29 + + vpmuludq ymm0,ymm0,ymm11 + vmovq rbx,xmm10 + vmovdqu ymm13,YMMWORD[((-16+96-128))+rcx] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD[((-16+128-128))+rcx] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-16+160-128))+rcx] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD[((-16+192-128))+rcx] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD[((-16+224-128))+rcx] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-16+256-128))+rcx] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD[((-16+288-128))+rcx] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD[((-24+32-128))+rsi] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-24+64-128))+rsi] + vpaddq ymm9,ymm9,ymm13 + + add r12,r11 + imul rbx,QWORD[((-128))+rsi] + add r12,rbx + + mov rax,r12 + imul eax,r8d + and eax,0x1fffffff + + vpmuludq ymm0,ymm0,ymm10 + vmovd xmm11,eax + vmovdqu ymm13,YMMWORD[((-24+96-128))+rsi] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpbroadcastq ymm11,xmm11 + vmovdqu ymm0,YMMWORD[((-24+128-128))+rsi] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD[((-24+160-128))+rsi] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD[((-24+192-128))+rsi] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm0,YMMWORD[((-24+224-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD[((-24+256-128))+rsi] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD[((-24+288-128))+rsi] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpbroadcastq ymm10,QWORD[32+r13] + vpaddq ymm9,ymm9,ymm13 + add r13,32 + + vmovdqu ymm0,YMMWORD[((-24+32-128))+rcx] + imul rax,QWORD[((-128))+rcx] + add r12,rax + shr r12,29 + + vmovdqu ymm12,YMMWORD[((-24+64-128))+rcx] + vpmuludq ymm0,ymm0,ymm11 + vmovq rbx,xmm10 + vmovdqu ymm13,YMMWORD[((-24+96-128))+rcx] + vpaddq ymm0,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu YMMWORD[rsp],ymm0 + vpaddq ymm1,ymm2,ymm12 + vmovdqu ymm0,YMMWORD[((-24+128-128))+rcx] + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-24+160-128))+rcx] + vpaddq ymm2,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD[((-24+192-128))+rcx] + vpaddq ymm3,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD[((-24+224-128))+rcx] + vpaddq ymm4,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD[((-24+256-128))+rcx] + vpaddq ymm5,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD[((-24+288-128))+rcx] + mov r9,r12 + vpaddq ymm6,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm11 + add r9,QWORD[rsp] + vpaddq ymm7,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovq xmm12,r12 + vpaddq ymm8,ymm9,ymm13 + + dec r14d + jnz NEAR $L$oop_mul_1024 + vpaddq ymm0,ymm12,YMMWORD[rsp] + + vpsrlq ymm12,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm13,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm10,ymm2,29 + vpermq ymm12,ymm12,0x93 + vpand ymm2,ymm2,ymm15 + vpsrlq ymm11,ymm3,29 + vpermq ymm13,ymm13,0x93 + vpand ymm3,ymm3,ymm15 + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm10,ymm10,0x93 + vpblendd ymm12,ymm13,ymm12,3 + vpermq ymm11,ymm11,0x93 + vpaddq ymm0,ymm0,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm1,ymm1,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm2,ymm2,ymm13 + vpblendd ymm11,ymm14,ymm11,3 + vpaddq ymm3,ymm3,ymm10 + vpaddq ymm4,ymm4,ymm11 + + vpsrlq ymm12,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm13,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm10,ymm2,29 + vpermq ymm12,ymm12,0x93 + vpand ymm2,ymm2,ymm15 + vpsrlq ymm11,ymm3,29 + vpermq ymm13,ymm13,0x93 + vpand ymm3,ymm3,ymm15 + vpermq ymm10,ymm10,0x93 + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm11,ymm11,0x93 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm0,ymm0,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm1,ymm1,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm2,ymm2,ymm13 + vpblendd ymm11,ymm14,ymm11,3 + vpaddq ymm3,ymm3,ymm10 + vpaddq ymm4,ymm4,ymm11 + + vmovdqu YMMWORD[(0-128)+rdi],ymm0 + vmovdqu YMMWORD[(32-128)+rdi],ymm1 + vmovdqu YMMWORD[(64-128)+rdi],ymm2 + vmovdqu YMMWORD[(96-128)+rdi],ymm3 + vpsrlq ymm12,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm13,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm10,ymm6,29 + vpermq ymm12,ymm12,0x93 + vpand ymm6,ymm6,ymm15 + vpsrlq ymm11,ymm7,29 + vpermq ymm13,ymm13,0x93 + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm10,ymm10,0x93 + vpand ymm8,ymm8,ymm15 + vpermq ymm11,ymm11,0x93 + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm0,ymm0,0x93 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm4,ymm4,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm5,ymm5,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm6,ymm6,ymm13 + vpblendd ymm11,ymm0,ymm11,3 + vpaddq ymm7,ymm7,ymm10 + vpaddq ymm8,ymm8,ymm11 + + vpsrlq ymm12,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm13,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm10,ymm6,29 + vpermq ymm12,ymm12,0x93 + vpand ymm6,ymm6,ymm15 + vpsrlq ymm11,ymm7,29 + vpermq ymm13,ymm13,0x93 + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm10,ymm10,0x93 + vpand ymm8,ymm8,ymm15 + vpermq ymm11,ymm11,0x93 + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm0,ymm0,0x93 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm4,ymm4,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm5,ymm5,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm6,ymm6,ymm13 + vpblendd ymm11,ymm0,ymm11,3 + vpaddq ymm7,ymm7,ymm10 + vpaddq ymm8,ymm8,ymm11 + + vmovdqu YMMWORD[(128-128)+rdi],ymm4 + vmovdqu YMMWORD[(160-128)+rdi],ymm5 + vmovdqu YMMWORD[(192-128)+rdi],ymm6 + vmovdqu YMMWORD[(224-128)+rdi],ymm7 + vmovdqu YMMWORD[(256-128)+rdi],ymm8 + vzeroupper + + mov rax,rbp + +$L$mul_1024_in_tail: + movaps xmm6,XMMWORD[((-216))+rax] + movaps xmm7,XMMWORD[((-200))+rax] + movaps xmm8,XMMWORD[((-184))+rax] + movaps xmm9,XMMWORD[((-168))+rax] + movaps xmm10,XMMWORD[((-152))+rax] + movaps xmm11,XMMWORD[((-136))+rax] + movaps xmm12,XMMWORD[((-120))+rax] + movaps xmm13,XMMWORD[((-104))+rax] + movaps xmm14,XMMWORD[((-88))+rax] + movaps xmm15,XMMWORD[((-72))+rax] + mov r15,QWORD[((-48))+rax] + + mov r14,QWORD[((-40))+rax] + + mov r13,QWORD[((-32))+rax] + + mov r12,QWORD[((-24))+rax] + + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$mul_1024_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_rsaz_1024_mul_avx2: +global rsaz_1024_red2norm_avx2 + +ALIGN 32 +rsaz_1024_red2norm_avx2: + +_CET_ENDBR + sub rdx,-128 + xor rax,rax + mov r8,QWORD[((-128))+rdx] + mov r9,QWORD[((-120))+rdx] + mov r10,QWORD[((-112))+rdx] + shl r8,0 + shl r9,29 + mov r11,r10 + shl r10,58 + shr r11,6 + add rax,r8 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD[rcx],rax + mov rax,r11 + mov r8,QWORD[((-104))+rdx] + mov r9,QWORD[((-96))+rdx] + shl r8,23 + mov r10,r9 + shl r9,52 + shr r10,12 + add rax,r8 + add rax,r9 + adc r10,0 + mov QWORD[8+rcx],rax + mov rax,r10 + mov r11,QWORD[((-88))+rdx] + mov r8,QWORD[((-80))+rdx] + shl r11,17 + mov r9,r8 + shl r8,46 + shr r9,18 + add rax,r11 + add rax,r8 + adc r9,0 + mov QWORD[16+rcx],rax + mov rax,r9 + mov r10,QWORD[((-72))+rdx] + mov r11,QWORD[((-64))+rdx] + shl r10,11 + mov r8,r11 + shl r11,40 + shr r8,24 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD[24+rcx],rax + mov rax,r8 + mov r9,QWORD[((-56))+rdx] + mov r10,QWORD[((-48))+rdx] + mov r11,QWORD[((-40))+rdx] + shl r9,5 + shl r10,34 + mov r8,r11 + shl r11,63 + shr r8,1 + add rax,r9 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD[32+rcx],rax + mov rax,r8 + mov r9,QWORD[((-32))+rdx] + mov r10,QWORD[((-24))+rdx] + shl r9,28 + mov r11,r10 + shl r10,57 + shr r11,7 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD[40+rcx],rax + mov rax,r11 + mov r8,QWORD[((-16))+rdx] + mov r9,QWORD[((-8))+rdx] + shl r8,22 + mov r10,r9 + shl r9,51 + shr r10,13 + add rax,r8 + add rax,r9 + adc r10,0 + mov QWORD[48+rcx],rax + mov rax,r10 + mov r11,QWORD[rdx] + mov r8,QWORD[8+rdx] + shl r11,16 + mov r9,r8 + shl r8,45 + shr r9,19 + add rax,r11 + add rax,r8 + adc r9,0 + mov QWORD[56+rcx],rax + mov rax,r9 + mov r10,QWORD[16+rdx] + mov r11,QWORD[24+rdx] + shl r10,10 + mov r8,r11 + shl r11,39 + shr r8,25 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD[64+rcx],rax + mov rax,r8 + mov r9,QWORD[32+rdx] + mov r10,QWORD[40+rdx] + mov r11,QWORD[48+rdx] + shl r9,4 + shl r10,33 + mov r8,r11 + shl r11,62 + shr r8,2 + add rax,r9 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD[72+rcx],rax + mov rax,r8 + mov r9,QWORD[56+rdx] + mov r10,QWORD[64+rdx] + shl r9,27 + mov r11,r10 + shl r10,56 + shr r11,8 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD[80+rcx],rax + mov rax,r11 + mov r8,QWORD[72+rdx] + mov r9,QWORD[80+rdx] + shl r8,21 + mov r10,r9 + shl r9,50 + shr r10,14 + add rax,r8 + add rax,r9 + adc r10,0 + mov QWORD[88+rcx],rax + mov rax,r10 + mov r11,QWORD[88+rdx] + mov r8,QWORD[96+rdx] + shl r11,15 + mov r9,r8 + shl r8,44 + shr r9,20 + add rax,r11 + add rax,r8 + adc r9,0 + mov QWORD[96+rcx],rax + mov rax,r9 + mov r10,QWORD[104+rdx] + mov r11,QWORD[112+rdx] + shl r10,9 + mov r8,r11 + shl r11,38 + shr r8,26 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD[104+rcx],rax + mov rax,r8 + mov r9,QWORD[120+rdx] + mov r10,QWORD[128+rdx] + mov r11,QWORD[136+rdx] + shl r9,3 + shl r10,32 + mov r8,r11 + shl r11,61 + shr r8,3 + add rax,r9 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD[112+rcx],rax + mov rax,r8 + mov r9,QWORD[144+rdx] + mov r10,QWORD[152+rdx] + shl r9,26 + mov r11,r10 + shl r10,55 + shr r11,9 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD[120+rcx],rax + mov rax,r11 + ret + + + +global rsaz_1024_norm2red_avx2 + +ALIGN 32 +rsaz_1024_norm2red_avx2: + +_CET_ENDBR + sub rcx,-128 + mov r8,QWORD[rdx] + mov eax,0x1fffffff + mov r9,QWORD[8+rdx] + mov r11,r8 + shr r11,0 + and r11,rax + mov QWORD[((-128))+rcx],r11 + mov r10,r8 + shr r10,29 + and r10,rax + mov QWORD[((-120))+rcx],r10 + shrd r8,r9,58 + and r8,rax + mov QWORD[((-112))+rcx],r8 + mov r10,QWORD[16+rdx] + mov r8,r9 + shr r8,23 + and r8,rax + mov QWORD[((-104))+rcx],r8 + shrd r9,r10,52 + and r9,rax + mov QWORD[((-96))+rcx],r9 + mov r11,QWORD[24+rdx] + mov r9,r10 + shr r9,17 + and r9,rax + mov QWORD[((-88))+rcx],r9 + shrd r10,r11,46 + and r10,rax + mov QWORD[((-80))+rcx],r10 + mov r8,QWORD[32+rdx] + mov r10,r11 + shr r10,11 + and r10,rax + mov QWORD[((-72))+rcx],r10 + shrd r11,r8,40 + and r11,rax + mov QWORD[((-64))+rcx],r11 + mov r9,QWORD[40+rdx] + mov r11,r8 + shr r11,5 + and r11,rax + mov QWORD[((-56))+rcx],r11 + mov r10,r8 + shr r10,34 + and r10,rax + mov QWORD[((-48))+rcx],r10 + shrd r8,r9,63 + and r8,rax + mov QWORD[((-40))+rcx],r8 + mov r10,QWORD[48+rdx] + mov r8,r9 + shr r8,28 + and r8,rax + mov QWORD[((-32))+rcx],r8 + shrd r9,r10,57 + and r9,rax + mov QWORD[((-24))+rcx],r9 + mov r11,QWORD[56+rdx] + mov r9,r10 + shr r9,22 + and r9,rax + mov QWORD[((-16))+rcx],r9 + shrd r10,r11,51 + and r10,rax + mov QWORD[((-8))+rcx],r10 + mov r8,QWORD[64+rdx] + mov r10,r11 + shr r10,16 + and r10,rax + mov QWORD[rcx],r10 + shrd r11,r8,45 + and r11,rax + mov QWORD[8+rcx],r11 + mov r9,QWORD[72+rdx] + mov r11,r8 + shr r11,10 + and r11,rax + mov QWORD[16+rcx],r11 + shrd r8,r9,39 + and r8,rax + mov QWORD[24+rcx],r8 + mov r10,QWORD[80+rdx] + mov r8,r9 + shr r8,4 + and r8,rax + mov QWORD[32+rcx],r8 + mov r11,r9 + shr r11,33 + and r11,rax + mov QWORD[40+rcx],r11 + shrd r9,r10,62 + and r9,rax + mov QWORD[48+rcx],r9 + mov r11,QWORD[88+rdx] + mov r9,r10 + shr r9,27 + and r9,rax + mov QWORD[56+rcx],r9 + shrd r10,r11,56 + and r10,rax + mov QWORD[64+rcx],r10 + mov r8,QWORD[96+rdx] + mov r10,r11 + shr r10,21 + and r10,rax + mov QWORD[72+rcx],r10 + shrd r11,r8,50 + and r11,rax + mov QWORD[80+rcx],r11 + mov r9,QWORD[104+rdx] + mov r11,r8 + shr r11,15 + and r11,rax + mov QWORD[88+rcx],r11 + shrd r8,r9,44 + and r8,rax + mov QWORD[96+rcx],r8 + mov r10,QWORD[112+rdx] + mov r8,r9 + shr r8,9 + and r8,rax + mov QWORD[104+rcx],r8 + shrd r9,r10,38 + and r9,rax + mov QWORD[112+rcx],r9 + mov r11,QWORD[120+rdx] + mov r9,r10 + shr r9,3 + and r9,rax + mov QWORD[120+rcx],r9 + mov r8,r10 + shr r8,32 + and r8,rax + mov QWORD[128+rcx],r8 + shrd r10,r11,61 + and r10,rax + mov QWORD[136+rcx],r10 + xor r8,r8 + mov r10,r11 + shr r10,26 + and r10,rax + mov QWORD[144+rcx],r10 + shrd r11,r8,55 + and r11,rax + mov QWORD[152+rcx],r11 + mov QWORD[160+rcx],r8 + mov QWORD[168+rcx],r8 + mov QWORD[176+rcx],r8 + mov QWORD[184+rcx],r8 + ret + + +global rsaz_1024_scatter5_avx2 + +ALIGN 32 +rsaz_1024_scatter5_avx2: + +_CET_ENDBR + vzeroupper + vmovdqu ymm5,YMMWORD[$L$scatter_permd] + shl r8d,4 + lea rcx,[r8*1+rcx] + mov eax,9 + jmp NEAR $L$oop_scatter_1024 + +ALIGN 32 +$L$oop_scatter_1024: + vmovdqu ymm0,YMMWORD[rdx] + lea rdx,[32+rdx] + vpermd ymm0,ymm5,ymm0 + vmovdqu XMMWORD[rcx],xmm0 + lea rcx,[512+rcx] + dec eax + jnz NEAR $L$oop_scatter_1024 + + vzeroupper + ret + + + +global rsaz_1024_gather5_avx2 + +ALIGN 32 +rsaz_1024_gather5_avx2: + +_CET_ENDBR + vzeroupper + mov r11,rsp + + lea rax,[((-136))+rsp] +$L$SEH_begin_rsaz_1024_gather5: + + DB 0x48,0x8d,0x60,0xe0 + DB 0xc5,0xf8,0x29,0x70,0xe0 + DB 0xc5,0xf8,0x29,0x78,0xf0 + DB 0xc5,0x78,0x29,0x40,0x00 + DB 0xc5,0x78,0x29,0x48,0x10 + DB 0xc5,0x78,0x29,0x50,0x20 + DB 0xc5,0x78,0x29,0x58,0x30 + DB 0xc5,0x78,0x29,0x60,0x40 + DB 0xc5,0x78,0x29,0x68,0x50 + DB 0xc5,0x78,0x29,0x70,0x60 + DB 0xc5,0x78,0x29,0x78,0x70 + lea rsp,[((-256))+rsp] + and rsp,-32 + lea r10,[$L$inc] + lea rax,[((-128))+rsp] + + vmovd xmm4,r8d + vmovdqa ymm0,YMMWORD[r10] + vmovdqa ymm1,YMMWORD[32+r10] + vmovdqa ymm5,YMMWORD[64+r10] + vpbroadcastd ymm4,xmm4 + + vpaddd ymm2,ymm0,ymm5 + vpcmpeqd ymm0,ymm0,ymm4 + vpaddd ymm3,ymm1,ymm5 + vpcmpeqd ymm1,ymm1,ymm4 + vmovdqa YMMWORD[(0+128)+rax],ymm0 + vpaddd ymm0,ymm2,ymm5 + vpcmpeqd ymm2,ymm2,ymm4 + vmovdqa YMMWORD[(32+128)+rax],ymm1 + vpaddd ymm1,ymm3,ymm5 + vpcmpeqd ymm3,ymm3,ymm4 + vmovdqa YMMWORD[(64+128)+rax],ymm2 + vpaddd ymm2,ymm0,ymm5 + vpcmpeqd ymm0,ymm0,ymm4 + vmovdqa YMMWORD[(96+128)+rax],ymm3 + vpaddd ymm3,ymm1,ymm5 + vpcmpeqd ymm1,ymm1,ymm4 + vmovdqa YMMWORD[(128+128)+rax],ymm0 + vpaddd ymm8,ymm2,ymm5 + vpcmpeqd ymm2,ymm2,ymm4 + vmovdqa YMMWORD[(160+128)+rax],ymm1 + vpaddd ymm9,ymm3,ymm5 + vpcmpeqd ymm3,ymm3,ymm4 + vmovdqa YMMWORD[(192+128)+rax],ymm2 + vpaddd ymm10,ymm8,ymm5 + vpcmpeqd ymm8,ymm8,ymm4 + vmovdqa YMMWORD[(224+128)+rax],ymm3 + vpaddd ymm11,ymm9,ymm5 + vpcmpeqd ymm9,ymm9,ymm4 + vpaddd ymm12,ymm10,ymm5 + vpcmpeqd ymm10,ymm10,ymm4 + vpaddd ymm13,ymm11,ymm5 + vpcmpeqd ymm11,ymm11,ymm4 + vpaddd ymm14,ymm12,ymm5 + vpcmpeqd ymm12,ymm12,ymm4 + vpaddd ymm15,ymm13,ymm5 + vpcmpeqd ymm13,ymm13,ymm4 + vpcmpeqd ymm14,ymm14,ymm4 + vpcmpeqd ymm15,ymm15,ymm4 + + vmovdqa ymm7,YMMWORD[((-32))+r10] + lea rdx,[128+rdx] + mov r8d,9 + +$L$oop_gather_1024: + vmovdqa ymm0,YMMWORD[((0-128))+rdx] + vmovdqa ymm1,YMMWORD[((32-128))+rdx] + vmovdqa ymm2,YMMWORD[((64-128))+rdx] + vmovdqa ymm3,YMMWORD[((96-128))+rdx] + vpand ymm0,ymm0,YMMWORD[((0+128))+rax] + vpand ymm1,ymm1,YMMWORD[((32+128))+rax] + vpand ymm2,ymm2,YMMWORD[((64+128))+rax] + vpor ymm4,ymm1,ymm0 + vpand ymm3,ymm3,YMMWORD[((96+128))+rax] + vmovdqa ymm0,YMMWORD[((128-128))+rdx] + vmovdqa ymm1,YMMWORD[((160-128))+rdx] + vpor ymm5,ymm3,ymm2 + vmovdqa ymm2,YMMWORD[((192-128))+rdx] + vmovdqa ymm3,YMMWORD[((224-128))+rdx] + vpand ymm0,ymm0,YMMWORD[((128+128))+rax] + vpand ymm1,ymm1,YMMWORD[((160+128))+rax] + vpand ymm2,ymm2,YMMWORD[((192+128))+rax] + vpor ymm4,ymm4,ymm0 + vpand ymm3,ymm3,YMMWORD[((224+128))+rax] + vpand ymm0,ymm8,YMMWORD[((256-128))+rdx] + vpor ymm5,ymm5,ymm1 + vpand ymm1,ymm9,YMMWORD[((288-128))+rdx] + vpor ymm4,ymm4,ymm2 + vpand ymm2,ymm10,YMMWORD[((320-128))+rdx] + vpor ymm5,ymm5,ymm3 + vpand ymm3,ymm11,YMMWORD[((352-128))+rdx] + vpor ymm4,ymm4,ymm0 + vpand ymm0,ymm12,YMMWORD[((384-128))+rdx] + vpor ymm5,ymm5,ymm1 + vpand ymm1,ymm13,YMMWORD[((416-128))+rdx] + vpor ymm4,ymm4,ymm2 + vpand ymm2,ymm14,YMMWORD[((448-128))+rdx] + vpor ymm5,ymm5,ymm3 + vpand ymm3,ymm15,YMMWORD[((480-128))+rdx] + lea rdx,[512+rdx] + vpor ymm4,ymm4,ymm0 + vpor ymm5,ymm5,ymm1 + vpor ymm4,ymm4,ymm2 + vpor ymm5,ymm5,ymm3 + + vpor ymm4,ymm4,ymm5 + vextracti128 xmm5,ymm4,1 + vpor xmm5,xmm5,xmm4 + vpermd ymm5,ymm7,ymm5 + vmovdqu YMMWORD[rcx],ymm5 + lea rcx,[32+rcx] + dec r8d + jnz NEAR $L$oop_gather_1024 + + vpxor ymm0,ymm0,ymm0 + vmovdqu YMMWORD[rcx],ymm0 + vzeroupper + movaps xmm6,XMMWORD[((-168))+r11] + movaps xmm7,XMMWORD[((-152))+r11] + movaps xmm8,XMMWORD[((-136))+r11] + movaps xmm9,XMMWORD[((-120))+r11] + movaps xmm10,XMMWORD[((-104))+r11] + movaps xmm11,XMMWORD[((-88))+r11] + movaps xmm12,XMMWORD[((-72))+r11] + movaps xmm13,XMMWORD[((-56))+r11] + movaps xmm14,XMMWORD[((-40))+r11] + movaps xmm15,XMMWORD[((-24))+r11] + lea rsp,[r11] + + ret + +$L$SEH_end_rsaz_1024_gather5: + +section .rdata rdata align=8 +ALIGN 64 +$L$and_mask: + DQ 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff +$L$scatter_permd: + DD 0,2,4,6,7,7,7,7 +$L$gather_permd: + DD 0,7,1,7,2,7,3,7 +$L$inc: + DD 0,0,0,0,1,1,1,1 + DD 2,2,2,2,3,3,3,3 + DD 4,4,4,4,4,4,4,4 +ALIGN 64 +section .text + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +rsaz_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rbp,QWORD[160+r8] + + mov r10d,DWORD[8+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + cmovc rax,rbp + + mov r15,QWORD[((-48))+rax] + mov r14,QWORD[((-40))+rax] + mov r13,QWORD[((-32))+rax] + mov r12,QWORD[((-24))+rax] + mov rbp,QWORD[((-16))+rax] + mov rbx,QWORD[((-8))+rax] + mov QWORD[240+r8],r15 + mov QWORD[232+r8],r14 + mov QWORD[224+r8],r13 + mov QWORD[216+r8],r12 + mov QWORD[160+r8],rbp + mov QWORD[144+r8],rbx + + lea rsi,[((-216))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_rsaz_1024_sqr_avx2 wrt ..imagebase + DD $L$SEH_end_rsaz_1024_sqr_avx2 wrt ..imagebase + DD $L$SEH_info_rsaz_1024_sqr_avx2 wrt ..imagebase + + DD $L$SEH_begin_rsaz_1024_mul_avx2 wrt ..imagebase + DD $L$SEH_end_rsaz_1024_mul_avx2 wrt ..imagebase + DD $L$SEH_info_rsaz_1024_mul_avx2 wrt ..imagebase + + DD $L$SEH_begin_rsaz_1024_gather5 wrt ..imagebase + DD $L$SEH_end_rsaz_1024_gather5 wrt ..imagebase + DD $L$SEH_info_rsaz_1024_gather5 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_rsaz_1024_sqr_avx2: + DB 9,0,0,0 + DD rsaz_se_handler wrt ..imagebase + DD $L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase + DD 0 +$L$SEH_info_rsaz_1024_mul_avx2: + DB 9,0,0,0 + DD rsaz_se_handler wrt ..imagebase + DD $L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase + DD 0 +$L$SEH_info_rsaz_1024_gather5: + DB 0x01,0x36,0x17,0x0b + DB 0x36,0xf8,0x09,0x00 + DB 0x31,0xe8,0x08,0x00 + DB 0x2c,0xd8,0x07,0x00 + DB 0x27,0xc8,0x06,0x00 + DB 0x22,0xb8,0x05,0x00 + DB 0x1d,0xa8,0x04,0x00 + DB 0x18,0x98,0x03,0x00 + DB 0x13,0x88,0x02,0x00 + DB 0x0e,0x78,0x01,0x00 + DB 0x09,0x68,0x00,0x00 + DB 0x04,0x01,0x15,0x00 + DB 0x00,0xb3,0x00,0x00 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-586-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha1-586-apple.S new file mode 100644 index 0000000000..f0ab02be58 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-586-apple.S @@ -0,0 +1,3782 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw +.align 4 +_sha1_block_data_order_nohw: +L_sha1_block_data_order_nohw_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp L000loop +.align 4,0x90 +L000loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + # 00_15 0 + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 1 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 2 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 3 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 4 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 5 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 6 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 7 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 8 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 9 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 10 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 11 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 12 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 13 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 14 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 15 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + # 16_19 16 + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 16_19 17 + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 16_19 18 + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 16_19 19 + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 20 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 21 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 22 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 23 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 24 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 25 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 26 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 27 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 28 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 29 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 30 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 31 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + # 20_39 32 + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + # 20_39 33 + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + # 20_39 34 + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + # 20_39 35 + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + # 20_39 36 + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + # 20_39 37 + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + # 20_39 38 + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + # 20_39 39 + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + # 40_59 40 + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + # 40_59 41 + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + # 40_59 42 + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + # 40_59 43 + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + # 40_59 44 + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + # 40_59 45 + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + # 40_59 46 + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + # 40_59 47 + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + # 40_59 48 + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + # 40_59 49 + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + # 40_59 50 + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + # 40_59 51 + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + # 40_59 52 + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + # 40_59 53 + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + # 40_59 54 + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + # 40_59 55 + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + # 40_59 56 + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + # 40_59 57 + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + # 40_59 58 + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + # 40_59 59 + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + # 20_39 60 + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + # 20_39 61 + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + # 20_39 62 + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + # 20_39 63 + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + # 20_39 64 + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 20_39 65 + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 20_39 66 + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 20_39 67 + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 68 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 69 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 70 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 71 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 72 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 73 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 74 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 75 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 76 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 77 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 78 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 79 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb L000loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _sha1_block_data_order_ssse3 +.private_extern _sha1_block_data_order_ssse3 +.align 4 +_sha1_block_data_order_ssse3: +L_sha1_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L001pic_point +L001pic_point: + popl %ebp + leal LK_XX_XX-L001pic_point(%ebp),%ebp + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp L002loop +.align 4,0x90 +L002loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je L003done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp L002loop +.align 4,0x90 +L003done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _sha1_block_data_order_avx +.private_extern _sha1_block_data_order_avx +.align 4 +_sha1_block_data_order_avx: +L_sha1_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L004pic_point +L004pic_point: + popl %ebp + leal LK_XX_XX-L004pic_point(%ebp),%ebp + vzeroall + vmovdqa (%ebp),%xmm7 + vmovdqa 16(%ebp),%xmm0 + vmovdqa 32(%ebp),%xmm1 + vmovdqa 48(%ebp),%xmm2 + vmovdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + vmovdqa %xmm0,112(%esp) + vmovdqa %xmm1,128(%esp) + vmovdqa %xmm2,144(%esp) + shll $6,%edx + vmovdqa %xmm7,160(%esp) + addl %ebp,%edx + vmovdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + vmovdqu -64(%ebp),%xmm0 + vmovdqu -48(%ebp),%xmm1 + vmovdqu -32(%ebp),%xmm2 + vmovdqu -16(%ebp),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vmovdqa %xmm7,96(%esp) + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm7,%xmm0,%xmm4 + vpaddd %xmm7,%xmm1,%xmm5 + vpaddd %xmm7,%xmm2,%xmm6 + vmovdqa %xmm4,(%esp) + movl %ecx,%ebp + vmovdqa %xmm5,16(%esp) + xorl %edx,%ebp + vmovdqa %xmm6,32(%esp) + andl %ebp,%esi + jmp L005loop +.align 4,0x90 +L005loop: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%ebp + addl (%esp),%edi + vpaddd %xmm3,%xmm7,%xmm7 + vmovdqa %xmm0,64(%esp) + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%edi + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vmovdqa %xmm7,48(%esp) + movl %edi,%esi + addl 4(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%edi,%edi + addl %ebp,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm6 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm0 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrld $30,%xmm0,%xmm7 + vpor %xmm6,%xmm4,%xmm4 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + vpslld $2,%xmm0,%xmm0 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vpxor %xmm7,%xmm4,%xmm4 + movl %ecx,%esi + addl 12(%esp),%ebx + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpxor %xmm0,%xmm4,%xmm4 + addl %ebp,%ebx + andl %edx,%esi + vmovdqa 96(%esp),%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%ebp + addl 16(%esp),%eax + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqa %xmm1,80(%esp) + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vmovdqa %xmm0,(%esp) + movl %eax,%esi + addl 20(%esp),%edi + vpxor %xmm7,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %ebp,%edi + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm7 + xorl %ecx,%ebx + addl %eax,%edi + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm1 + vpaddd %xmm5,%xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm0 + vpor %xmm7,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpxor %xmm0,%xmm5,%xmm5 + movl %edx,%esi + addl 28(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpxor %xmm1,%xmm5,%xmm5 + addl %ebp,%ecx + andl %edi,%esi + vmovdqa 112(%esp),%xmm1 + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%ebp + addl 32(%esp),%ebx + vpaddd %xmm5,%xmm1,%xmm1 + vmovdqa %xmm2,96(%esp) + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + vpxor %xmm2,%xmm6,%xmm6 + xorl %edi,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%ecx,%ecx + xorl %edi,%ebp + vmovdqa %xmm1,16(%esp) + movl %ebx,%esi + addl 36(%esp),%eax + vpxor %xmm0,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + addl %ebp,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm2 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm1 + vpor %xmm0,%xmm6,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + vmovdqa 64(%esp),%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vpxor %xmm1,%xmm6,%xmm6 + movl %edi,%esi + addl 44(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpxor %xmm2,%xmm6,%xmm6 + addl %ebp,%edx + andl %eax,%esi + vmovdqa 112(%esp),%xmm2 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%ebp + addl 48(%esp),%ecx + vpaddd %xmm6,%xmm2,%xmm2 + vmovdqa %xmm3,64(%esp) + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm1 + addl %esi,%ecx + andl %edi,%ebp + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%edi + addl %edx,%ecx + vpxor %xmm5,%xmm1,%xmm1 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vmovdqa %xmm2,32(%esp) + movl %ecx,%esi + addl 52(%esp),%ebx + vpxor %xmm1,%xmm7,%xmm7 + xorl %edi,%edx + shldl $5,%ecx,%ecx + addl %ebp,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm1 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpslldq $12,%xmm7,%xmm3 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm2 + vpor %xmm1,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + vmovdqa 80(%esp),%xmm1 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vpxor %xmm2,%xmm7,%xmm7 + movl %eax,%esi + addl 60(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpxor %xmm3,%xmm7,%xmm7 + addl %ebp,%edi + andl %ebx,%esi + vmovdqa 112(%esp),%xmm3 + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,80(%esp) + xorl %ebx,%eax + shldl $5,%edi,%edi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + addl %esi,%edx + andl %eax,%ebp + vpxor %xmm2,%xmm0,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + vpor %xmm2,%xmm0,%xmm0 + xorl %edi,%edx + shldl $5,%ecx,%ecx + vmovdqa 96(%esp),%xmm2 + addl %esi,%ebx + andl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm3,%xmm1,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm3,%xmm1,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + vmovdqa 64(%esp),%xmm3 + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + vmovdqa 128(%esp),%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm4,%xmm2,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vpor %xmm4,%xmm2,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + vmovdqa 80(%esp),%xmm4 + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + vmovdqa 96(%esp),%xmm5 + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm6 + vpxor %xmm0,%xmm4,%xmm4 + addl (%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + vmovdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + vmovdqa %xmm7,%xmm0 + vpaddd %xmm3,%xmm7,%xmm7 + shrdl $7,%edi,%edi + addl %edx,%ecx + vpxor %xmm6,%xmm4,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm6 + vmovdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm6,%xmm4,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + vmovdqa 64(%esp),%xmm6 + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpalignr $8,%xmm3,%xmm4,%xmm7 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + vpxor %xmm6,%xmm5,%xmm5 + vmovdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + vmovdqa %xmm0,%xmm1 + vpaddd %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %edi,%edx + vpxor %xmm7,%xmm5,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm7 + vmovdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm7,%xmm5,%xmm5 + addl 28(%esp),%eax + vmovdqa 80(%esp),%xmm7 + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + vmovdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + vmovdqa %xmm1,%xmm2 + vpaddd %xmm5,%xmm1,%xmm1 + shldl $5,%eax,%eax + addl %esi,%edi + vpxor %xmm0,%xmm6,%xmm6 + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + vpsrld $30,%xmm6,%xmm0 + vmovdqa %xmm1,16(%esp) + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + vpor %xmm0,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%edi,%edi + vmovdqa 96(%esp),%xmm0 + movl %edx,%ebp + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm1 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + vmovdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + vmovdqa 144(%esp),%xmm3 + vpaddd %xmm6,%xmm2,%xmm2 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + vpsrld $30,%xmm7,%xmm1 + vmovdqa %xmm2,32(%esp) + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + vpor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vmovdqa 64(%esp),%xmm1 + movl %edi,%ebp + xorl %ebx,%esi + shldl $5,%edi,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + addl (%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm2,%xmm0,%xmm0 + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + vpor %xmm2,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vmovdqa 80(%esp),%xmm2 + movl %eax,%ebp + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%edi,%edi + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm3,%xmm1,%xmm1 + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + vpor %xmm3,%xmm1,%xmm1 + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vmovdqa 96(%esp),%xmm3 + movl %ebx,%ebp + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + vmovdqa %xmm5,%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shldl $5,%edi,%edi + addl %esi,%edx + vpxor %xmm4,%xmm2,%xmm2 + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + vpor %xmm4,%xmm2,%xmm2 + xorl %eax,%edi + shrdl $7,%edx,%edx + vmovdqa 64(%esp),%xmm4 + movl %ecx,%ebp + xorl %edi,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl (%esp),%eax + vpaddd %xmm3,%xmm7,%xmm7 + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm7,48(%esp) + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je L006done + vmovdqa 160(%esp),%xmm7 + vmovdqa 176(%esp),%xmm6 + vmovdqu (%ebp),%xmm0 + vmovdqu 16(%ebp),%xmm1 + vmovdqu 32(%ebp),%xmm2 + vmovdqu 48(%ebp),%xmm3 + addl $64,%ebp + vpshufb %xmm6,%xmm0,%xmm0 + movl %ebp,196(%esp) + vmovdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpaddd %xmm7,%xmm0,%xmm4 + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,(%esp) + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%ebp + shldl $5,%edx,%edx + vpaddd %xmm7,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vmovdqa %xmm5,16(%esp) + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %edi,%ebp + shldl $5,%edi,%edi + vpaddd %xmm7,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vmovdqa %xmm6,32(%esp) + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,%ebx + movl %ecx,8(%ebp) + xorl %edx,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movl %esi,%ebp + andl %ebx,%esi + movl %ebp,%ebx + jmp L005loop +.align 4,0x90 +L006done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroall + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-586-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha1-586-linux.S new file mode 100644 index 0000000000..0e5754fe20 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-586-linux.S @@ -0,0 +1,3788 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,@function +.align 16 +sha1_block_data_order_nohw: +.L_sha1_block_data_order_nohw_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp .L000loop +.align 16 +.L000loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb .L000loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order_nohw,.-.L_sha1_block_data_order_nohw_begin +.globl sha1_block_data_order_ssse3 +.hidden sha1_block_data_order_ssse3 +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +.L_sha1_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L001pic_point +.L001pic_point: + popl %ebp + leal .LK_XX_XX-.L001pic_point(%ebp),%ebp + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp .L002loop +.align 16 +.L002loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L003done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp .L002loop +.align 16 +.L003done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order_ssse3,.-.L_sha1_block_data_order_ssse3_begin +.globl sha1_block_data_order_avx +.hidden sha1_block_data_order_avx +.type sha1_block_data_order_avx,@function +.align 16 +sha1_block_data_order_avx: +.L_sha1_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L004pic_point +.L004pic_point: + popl %ebp + leal .LK_XX_XX-.L004pic_point(%ebp),%ebp + vzeroall + vmovdqa (%ebp),%xmm7 + vmovdqa 16(%ebp),%xmm0 + vmovdqa 32(%ebp),%xmm1 + vmovdqa 48(%ebp),%xmm2 + vmovdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + vmovdqa %xmm0,112(%esp) + vmovdqa %xmm1,128(%esp) + vmovdqa %xmm2,144(%esp) + shll $6,%edx + vmovdqa %xmm7,160(%esp) + addl %ebp,%edx + vmovdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + vmovdqu -64(%ebp),%xmm0 + vmovdqu -48(%ebp),%xmm1 + vmovdqu -32(%ebp),%xmm2 + vmovdqu -16(%ebp),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vmovdqa %xmm7,96(%esp) + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm7,%xmm0,%xmm4 + vpaddd %xmm7,%xmm1,%xmm5 + vpaddd %xmm7,%xmm2,%xmm6 + vmovdqa %xmm4,(%esp) + movl %ecx,%ebp + vmovdqa %xmm5,16(%esp) + xorl %edx,%ebp + vmovdqa %xmm6,32(%esp) + andl %ebp,%esi + jmp .L005loop +.align 16 +.L005loop: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%ebp + addl (%esp),%edi + vpaddd %xmm3,%xmm7,%xmm7 + vmovdqa %xmm0,64(%esp) + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%edi + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vmovdqa %xmm7,48(%esp) + movl %edi,%esi + addl 4(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%edi,%edi + addl %ebp,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm6 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm0 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrld $30,%xmm0,%xmm7 + vpor %xmm6,%xmm4,%xmm4 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + vpslld $2,%xmm0,%xmm0 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vpxor %xmm7,%xmm4,%xmm4 + movl %ecx,%esi + addl 12(%esp),%ebx + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpxor %xmm0,%xmm4,%xmm4 + addl %ebp,%ebx + andl %edx,%esi + vmovdqa 96(%esp),%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%ebp + addl 16(%esp),%eax + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqa %xmm1,80(%esp) + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vmovdqa %xmm0,(%esp) + movl %eax,%esi + addl 20(%esp),%edi + vpxor %xmm7,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %ebp,%edi + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm7 + xorl %ecx,%ebx + addl %eax,%edi + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm1 + vpaddd %xmm5,%xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm0 + vpor %xmm7,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpxor %xmm0,%xmm5,%xmm5 + movl %edx,%esi + addl 28(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpxor %xmm1,%xmm5,%xmm5 + addl %ebp,%ecx + andl %edi,%esi + vmovdqa 112(%esp),%xmm1 + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%ebp + addl 32(%esp),%ebx + vpaddd %xmm5,%xmm1,%xmm1 + vmovdqa %xmm2,96(%esp) + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + vpxor %xmm2,%xmm6,%xmm6 + xorl %edi,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%ecx,%ecx + xorl %edi,%ebp + vmovdqa %xmm1,16(%esp) + movl %ebx,%esi + addl 36(%esp),%eax + vpxor %xmm0,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + addl %ebp,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm2 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm1 + vpor %xmm0,%xmm6,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + vmovdqa 64(%esp),%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vpxor %xmm1,%xmm6,%xmm6 + movl %edi,%esi + addl 44(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpxor %xmm2,%xmm6,%xmm6 + addl %ebp,%edx + andl %eax,%esi + vmovdqa 112(%esp),%xmm2 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%ebp + addl 48(%esp),%ecx + vpaddd %xmm6,%xmm2,%xmm2 + vmovdqa %xmm3,64(%esp) + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm1 + addl %esi,%ecx + andl %edi,%ebp + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%edi + addl %edx,%ecx + vpxor %xmm5,%xmm1,%xmm1 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vmovdqa %xmm2,32(%esp) + movl %ecx,%esi + addl 52(%esp),%ebx + vpxor %xmm1,%xmm7,%xmm7 + xorl %edi,%edx + shldl $5,%ecx,%ecx + addl %ebp,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm1 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpslldq $12,%xmm7,%xmm3 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm2 + vpor %xmm1,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + vmovdqa 80(%esp),%xmm1 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vpxor %xmm2,%xmm7,%xmm7 + movl %eax,%esi + addl 60(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpxor %xmm3,%xmm7,%xmm7 + addl %ebp,%edi + andl %ebx,%esi + vmovdqa 112(%esp),%xmm3 + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,80(%esp) + xorl %ebx,%eax + shldl $5,%edi,%edi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + addl %esi,%edx + andl %eax,%ebp + vpxor %xmm2,%xmm0,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + vpor %xmm2,%xmm0,%xmm0 + xorl %edi,%edx + shldl $5,%ecx,%ecx + vmovdqa 96(%esp),%xmm2 + addl %esi,%ebx + andl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm3,%xmm1,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm3,%xmm1,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + vmovdqa 64(%esp),%xmm3 + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + vmovdqa 128(%esp),%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm4,%xmm2,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vpor %xmm4,%xmm2,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + vmovdqa 80(%esp),%xmm4 + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + vmovdqa 96(%esp),%xmm5 + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm6 + vpxor %xmm0,%xmm4,%xmm4 + addl (%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + vmovdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + vmovdqa %xmm7,%xmm0 + vpaddd %xmm3,%xmm7,%xmm7 + shrdl $7,%edi,%edi + addl %edx,%ecx + vpxor %xmm6,%xmm4,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm6 + vmovdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm6,%xmm4,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + vmovdqa 64(%esp),%xmm6 + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpalignr $8,%xmm3,%xmm4,%xmm7 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + vpxor %xmm6,%xmm5,%xmm5 + vmovdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + vmovdqa %xmm0,%xmm1 + vpaddd %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %edi,%edx + vpxor %xmm7,%xmm5,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm7 + vmovdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm7,%xmm5,%xmm5 + addl 28(%esp),%eax + vmovdqa 80(%esp),%xmm7 + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + vmovdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + vmovdqa %xmm1,%xmm2 + vpaddd %xmm5,%xmm1,%xmm1 + shldl $5,%eax,%eax + addl %esi,%edi + vpxor %xmm0,%xmm6,%xmm6 + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + vpsrld $30,%xmm6,%xmm0 + vmovdqa %xmm1,16(%esp) + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + vpor %xmm0,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%edi,%edi + vmovdqa 96(%esp),%xmm0 + movl %edx,%ebp + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm1 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + vmovdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + vmovdqa 144(%esp),%xmm3 + vpaddd %xmm6,%xmm2,%xmm2 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + vpsrld $30,%xmm7,%xmm1 + vmovdqa %xmm2,32(%esp) + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + vpor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vmovdqa 64(%esp),%xmm1 + movl %edi,%ebp + xorl %ebx,%esi + shldl $5,%edi,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + addl (%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm2,%xmm0,%xmm0 + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + vpor %xmm2,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vmovdqa 80(%esp),%xmm2 + movl %eax,%ebp + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%edi,%edi + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm3,%xmm1,%xmm1 + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + vpor %xmm3,%xmm1,%xmm1 + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vmovdqa 96(%esp),%xmm3 + movl %ebx,%ebp + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + vmovdqa %xmm5,%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shldl $5,%edi,%edi + addl %esi,%edx + vpxor %xmm4,%xmm2,%xmm2 + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + vpor %xmm4,%xmm2,%xmm2 + xorl %eax,%edi + shrdl $7,%edx,%edx + vmovdqa 64(%esp),%xmm4 + movl %ecx,%ebp + xorl %edi,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl (%esp),%eax + vpaddd %xmm3,%xmm7,%xmm7 + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm7,48(%esp) + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L006done + vmovdqa 160(%esp),%xmm7 + vmovdqa 176(%esp),%xmm6 + vmovdqu (%ebp),%xmm0 + vmovdqu 16(%ebp),%xmm1 + vmovdqu 32(%ebp),%xmm2 + vmovdqu 48(%ebp),%xmm3 + addl $64,%ebp + vpshufb %xmm6,%xmm0,%xmm0 + movl %ebp,196(%esp) + vmovdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpaddd %xmm7,%xmm0,%xmm4 + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,(%esp) + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%ebp + shldl $5,%edx,%edx + vpaddd %xmm7,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vmovdqa %xmm5,16(%esp) + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %edi,%ebp + shldl $5,%edi,%edi + vpaddd %xmm7,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vmovdqa %xmm6,32(%esp) + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,%ebx + movl %ecx,8(%ebp) + xorl %edx,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movl %esi,%ebp + andl %ebx,%esi + movl %ebp,%ebx + jmp .L005loop +.align 16 +.L006done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroall + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order_avx,.-.L_sha1_block_data_order_avx_begin +.align 64 +.LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-586-win.asm b/yass/third_party/boringssl/src/gen/bcm/sha1-586-win.asm new file mode 100644 index 0000000000..c8823a9c37 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-586-win.asm @@ -0,0 +1,3790 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _sha1_block_data_order_nohw +align 16 +_sha1_block_data_order_nohw: +L$_sha1_block_data_order_nohw_begin: + push ebp + push ebx + push esi + push edi + mov ebp,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov eax,DWORD [28+esp] + sub esp,76 + shl eax,6 + add eax,esi + mov DWORD [104+esp],eax + mov edi,DWORD [16+ebp] + jmp NEAR L$000loop +align 16 +L$000loop: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],edx + mov eax,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edx,DWORD [28+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [16+esp],eax + mov DWORD [20+esp],ebx + mov DWORD [24+esp],ecx + mov DWORD [28+esp],edx + mov eax,DWORD [32+esi] + mov ebx,DWORD [36+esi] + mov ecx,DWORD [40+esi] + mov edx,DWORD [44+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov DWORD [40+esp],ecx + mov DWORD [44+esp],edx + mov eax,DWORD [48+esi] + mov ebx,DWORD [52+esi] + mov ecx,DWORD [56+esi] + mov edx,DWORD [60+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [48+esp],eax + mov DWORD [52+esp],ebx + mov DWORD [56+esp],ecx + mov DWORD [60+esp],edx + mov DWORD [100+esp],esi + mov eax,DWORD [ebp] + mov ebx,DWORD [4+ebp] + mov ecx,DWORD [8+ebp] + mov edx,DWORD [12+ebp] + ; 00_15 0 + mov esi,ecx + mov ebp,eax + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD [esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,[1518500249+edi*1+ebp] + add ebp,esi + ; 00_15 1 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD [4+esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,[1518500249+edx*1+ebp] + add ebp,edi + ; 00_15 2 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD [8+esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,[1518500249+ecx*1+ebp] + add ebp,edx + ; 00_15 3 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD [12+esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,[1518500249+ebx*1+ebp] + add ebp,ecx + ; 00_15 4 + mov ebx,edi + mov ecx,ebp + rol ebp,5 + xor ebx,esi + add ebp,eax + mov eax,DWORD [16+esp] + and ebx,edx + ror edx,2 + xor ebx,esi + lea ebp,[1518500249+eax*1+ebp] + add ebp,ebx + ; 00_15 5 + mov eax,edx + mov ebx,ebp + rol ebp,5 + xor eax,edi + add ebp,esi + mov esi,DWORD [20+esp] + and eax,ecx + ror ecx,2 + xor eax,edi + lea ebp,[1518500249+esi*1+ebp] + add ebp,eax + ; 00_15 6 + mov esi,ecx + mov eax,ebp + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD [24+esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,[1518500249+edi*1+ebp] + add ebp,esi + ; 00_15 7 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD [28+esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,[1518500249+edx*1+ebp] + add ebp,edi + ; 00_15 8 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD [32+esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,[1518500249+ecx*1+ebp] + add ebp,edx + ; 00_15 9 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD [36+esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,[1518500249+ebx*1+ebp] + add ebp,ecx + ; 00_15 10 + mov ebx,edi + mov ecx,ebp + rol ebp,5 + xor ebx,esi + add ebp,eax + mov eax,DWORD [40+esp] + and ebx,edx + ror edx,2 + xor ebx,esi + lea ebp,[1518500249+eax*1+ebp] + add ebp,ebx + ; 00_15 11 + mov eax,edx + mov ebx,ebp + rol ebp,5 + xor eax,edi + add ebp,esi + mov esi,DWORD [44+esp] + and eax,ecx + ror ecx,2 + xor eax,edi + lea ebp,[1518500249+esi*1+ebp] + add ebp,eax + ; 00_15 12 + mov esi,ecx + mov eax,ebp + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD [48+esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,[1518500249+edi*1+ebp] + add ebp,esi + ; 00_15 13 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD [52+esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,[1518500249+edx*1+ebp] + add ebp,edi + ; 00_15 14 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD [56+esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,[1518500249+ecx*1+ebp] + add ebp,edx + ; 00_15 15 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD [60+esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,[1518500249+ebx*1+ebp] + mov ebx,DWORD [esp] + add ecx,ebp + ; 16_19 16 + mov ebp,edi + xor ebx,DWORD [8+esp] + xor ebp,esi + xor ebx,DWORD [32+esp] + and ebp,edx + xor ebx,DWORD [52+esp] + rol ebx,1 + xor ebp,esi + add eax,ebp + mov ebp,ecx + ror edx,2 + mov DWORD [esp],ebx + rol ebp,5 + lea ebx,[1518500249+eax*1+ebx] + mov eax,DWORD [4+esp] + add ebx,ebp + ; 16_19 17 + mov ebp,edx + xor eax,DWORD [12+esp] + xor ebp,edi + xor eax,DWORD [36+esp] + and ebp,ecx + xor eax,DWORD [56+esp] + rol eax,1 + xor ebp,edi + add esi,ebp + mov ebp,ebx + ror ecx,2 + mov DWORD [4+esp],eax + rol ebp,5 + lea eax,[1518500249+esi*1+eax] + mov esi,DWORD [8+esp] + add eax,ebp + ; 16_19 18 + mov ebp,ecx + xor esi,DWORD [16+esp] + xor ebp,edx + xor esi,DWORD [40+esp] + and ebp,ebx + xor esi,DWORD [60+esp] + rol esi,1 + xor ebp,edx + add edi,ebp + mov ebp,eax + ror ebx,2 + mov DWORD [8+esp],esi + rol ebp,5 + lea esi,[1518500249+edi*1+esi] + mov edi,DWORD [12+esp] + add esi,ebp + ; 16_19 19 + mov ebp,ebx + xor edi,DWORD [20+esp] + xor ebp,ecx + xor edi,DWORD [44+esp] + and ebp,eax + xor edi,DWORD [esp] + rol edi,1 + xor ebp,ecx + add edx,ebp + mov ebp,esi + ror eax,2 + mov DWORD [12+esp],edi + rol ebp,5 + lea edi,[1518500249+edx*1+edi] + mov edx,DWORD [16+esp] + add edi,ebp + ; 20_39 20 + mov ebp,esi + xor edx,DWORD [24+esp] + xor ebp,eax + xor edx,DWORD [48+esp] + xor ebp,ebx + xor edx,DWORD [4+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [16+esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [20+esp] + add edx,ebp + ; 20_39 21 + mov ebp,edi + xor ecx,DWORD [28+esp] + xor ebp,esi + xor ecx,DWORD [52+esp] + xor ebp,eax + xor ecx,DWORD [8+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [20+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [24+esp] + add ecx,ebp + ; 20_39 22 + mov ebp,edx + xor ebx,DWORD [32+esp] + xor ebp,edi + xor ebx,DWORD [56+esp] + xor ebp,esi + xor ebx,DWORD [12+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [24+esp],ebx + lea ebx,[1859775393+eax*1+ebx] + mov eax,DWORD [28+esp] + add ebx,ebp + ; 20_39 23 + mov ebp,ecx + xor eax,DWORD [36+esp] + xor ebp,edx + xor eax,DWORD [60+esp] + xor ebp,edi + xor eax,DWORD [16+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [28+esp],eax + lea eax,[1859775393+esi*1+eax] + mov esi,DWORD [32+esp] + add eax,ebp + ; 20_39 24 + mov ebp,ebx + xor esi,DWORD [40+esp] + xor ebp,ecx + xor esi,DWORD [esp] + xor ebp,edx + xor esi,DWORD [20+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [32+esp],esi + lea esi,[1859775393+edi*1+esi] + mov edi,DWORD [36+esp] + add esi,ebp + ; 20_39 25 + mov ebp,eax + xor edi,DWORD [44+esp] + xor ebp,ebx + xor edi,DWORD [4+esp] + xor ebp,ecx + xor edi,DWORD [24+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [36+esp],edi + lea edi,[1859775393+edx*1+edi] + mov edx,DWORD [40+esp] + add edi,ebp + ; 20_39 26 + mov ebp,esi + xor edx,DWORD [48+esp] + xor ebp,eax + xor edx,DWORD [8+esp] + xor ebp,ebx + xor edx,DWORD [28+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [40+esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [44+esp] + add edx,ebp + ; 20_39 27 + mov ebp,edi + xor ecx,DWORD [52+esp] + xor ebp,esi + xor ecx,DWORD [12+esp] + xor ebp,eax + xor ecx,DWORD [32+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [44+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [48+esp] + add ecx,ebp + ; 20_39 28 + mov ebp,edx + xor ebx,DWORD [56+esp] + xor ebp,edi + xor ebx,DWORD [16+esp] + xor ebp,esi + xor ebx,DWORD [36+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [48+esp],ebx + lea ebx,[1859775393+eax*1+ebx] + mov eax,DWORD [52+esp] + add ebx,ebp + ; 20_39 29 + mov ebp,ecx + xor eax,DWORD [60+esp] + xor ebp,edx + xor eax,DWORD [20+esp] + xor ebp,edi + xor eax,DWORD [40+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [52+esp],eax + lea eax,[1859775393+esi*1+eax] + mov esi,DWORD [56+esp] + add eax,ebp + ; 20_39 30 + mov ebp,ebx + xor esi,DWORD [esp] + xor ebp,ecx + xor esi,DWORD [24+esp] + xor ebp,edx + xor esi,DWORD [44+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [56+esp],esi + lea esi,[1859775393+edi*1+esi] + mov edi,DWORD [60+esp] + add esi,ebp + ; 20_39 31 + mov ebp,eax + xor edi,DWORD [4+esp] + xor ebp,ebx + xor edi,DWORD [28+esp] + xor ebp,ecx + xor edi,DWORD [48+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [60+esp],edi + lea edi,[1859775393+edx*1+edi] + mov edx,DWORD [esp] + add edi,ebp + ; 20_39 32 + mov ebp,esi + xor edx,DWORD [8+esp] + xor ebp,eax + xor edx,DWORD [32+esp] + xor ebp,ebx + xor edx,DWORD [52+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [4+esp] + add edx,ebp + ; 20_39 33 + mov ebp,edi + xor ecx,DWORD [12+esp] + xor ebp,esi + xor ecx,DWORD [36+esp] + xor ebp,eax + xor ecx,DWORD [56+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [4+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [8+esp] + add ecx,ebp + ; 20_39 34 + mov ebp,edx + xor ebx,DWORD [16+esp] + xor ebp,edi + xor ebx,DWORD [40+esp] + xor ebp,esi + xor ebx,DWORD [60+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [8+esp],ebx + lea ebx,[1859775393+eax*1+ebx] + mov eax,DWORD [12+esp] + add ebx,ebp + ; 20_39 35 + mov ebp,ecx + xor eax,DWORD [20+esp] + xor ebp,edx + xor eax,DWORD [44+esp] + xor ebp,edi + xor eax,DWORD [esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [12+esp],eax + lea eax,[1859775393+esi*1+eax] + mov esi,DWORD [16+esp] + add eax,ebp + ; 20_39 36 + mov ebp,ebx + xor esi,DWORD [24+esp] + xor ebp,ecx + xor esi,DWORD [48+esp] + xor ebp,edx + xor esi,DWORD [4+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [16+esp],esi + lea esi,[1859775393+edi*1+esi] + mov edi,DWORD [20+esp] + add esi,ebp + ; 20_39 37 + mov ebp,eax + xor edi,DWORD [28+esp] + xor ebp,ebx + xor edi,DWORD [52+esp] + xor ebp,ecx + xor edi,DWORD [8+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [20+esp],edi + lea edi,[1859775393+edx*1+edi] + mov edx,DWORD [24+esp] + add edi,ebp + ; 20_39 38 + mov ebp,esi + xor edx,DWORD [32+esp] + xor ebp,eax + xor edx,DWORD [56+esp] + xor ebp,ebx + xor edx,DWORD [12+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [24+esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [28+esp] + add edx,ebp + ; 20_39 39 + mov ebp,edi + xor ecx,DWORD [36+esp] + xor ebp,esi + xor ecx,DWORD [60+esp] + xor ebp,eax + xor ecx,DWORD [16+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [28+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [32+esp] + add ecx,ebp + ; 40_59 40 + mov ebp,edi + xor ebx,DWORD [40+esp] + xor ebp,esi + xor ebx,DWORD [esp] + and ebp,edx + xor ebx,DWORD [20+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [32+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [36+esp] + add ebx,ebp + ; 40_59 41 + mov ebp,edx + xor eax,DWORD [44+esp] + xor ebp,edi + xor eax,DWORD [4+esp] + and ebp,ecx + xor eax,DWORD [24+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [36+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [40+esp] + add eax,ebp + ; 40_59 42 + mov ebp,ecx + xor esi,DWORD [48+esp] + xor ebp,edx + xor esi,DWORD [8+esp] + and ebp,ebx + xor esi,DWORD [28+esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD [40+esp],esi + lea esi,[2400959708+ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD [44+esp] + add esi,ebp + ; 40_59 43 + mov ebp,ebx + xor edi,DWORD [52+esp] + xor ebp,ecx + xor edi,DWORD [12+esp] + and ebp,eax + xor edi,DWORD [32+esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD [44+esp],edi + lea edi,[2400959708+ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD [48+esp] + add edi,ebp + ; 40_59 44 + mov ebp,eax + xor edx,DWORD [56+esp] + xor ebp,ebx + xor edx,DWORD [16+esp] + and ebp,esi + xor edx,DWORD [36+esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD [48+esp],edx + lea edx,[2400959708+ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD [52+esp] + add edx,ebp + ; 40_59 45 + mov ebp,esi + xor ecx,DWORD [60+esp] + xor ebp,eax + xor ecx,DWORD [20+esp] + and ebp,edi + xor ecx,DWORD [40+esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD [52+esp],ecx + lea ecx,[2400959708+ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD [56+esp] + add ecx,ebp + ; 40_59 46 + mov ebp,edi + xor ebx,DWORD [esp] + xor ebp,esi + xor ebx,DWORD [24+esp] + and ebp,edx + xor ebx,DWORD [44+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [56+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [60+esp] + add ebx,ebp + ; 40_59 47 + mov ebp,edx + xor eax,DWORD [4+esp] + xor ebp,edi + xor eax,DWORD [28+esp] + and ebp,ecx + xor eax,DWORD [48+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [60+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [esp] + add eax,ebp + ; 40_59 48 + mov ebp,ecx + xor esi,DWORD [8+esp] + xor ebp,edx + xor esi,DWORD [32+esp] + and ebp,ebx + xor esi,DWORD [52+esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD [esp],esi + lea esi,[2400959708+ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD [4+esp] + add esi,ebp + ; 40_59 49 + mov ebp,ebx + xor edi,DWORD [12+esp] + xor ebp,ecx + xor edi,DWORD [36+esp] + and ebp,eax + xor edi,DWORD [56+esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD [4+esp],edi + lea edi,[2400959708+ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD [8+esp] + add edi,ebp + ; 40_59 50 + mov ebp,eax + xor edx,DWORD [16+esp] + xor ebp,ebx + xor edx,DWORD [40+esp] + and ebp,esi + xor edx,DWORD [60+esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD [8+esp],edx + lea edx,[2400959708+ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD [12+esp] + add edx,ebp + ; 40_59 51 + mov ebp,esi + xor ecx,DWORD [20+esp] + xor ebp,eax + xor ecx,DWORD [44+esp] + and ebp,edi + xor ecx,DWORD [esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD [12+esp],ecx + lea ecx,[2400959708+ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD [16+esp] + add ecx,ebp + ; 40_59 52 + mov ebp,edi + xor ebx,DWORD [24+esp] + xor ebp,esi + xor ebx,DWORD [48+esp] + and ebp,edx + xor ebx,DWORD [4+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [16+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [20+esp] + add ebx,ebp + ; 40_59 53 + mov ebp,edx + xor eax,DWORD [28+esp] + xor ebp,edi + xor eax,DWORD [52+esp] + and ebp,ecx + xor eax,DWORD [8+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [20+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [24+esp] + add eax,ebp + ; 40_59 54 + mov ebp,ecx + xor esi,DWORD [32+esp] + xor ebp,edx + xor esi,DWORD [56+esp] + and ebp,ebx + xor esi,DWORD [12+esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD [24+esp],esi + lea esi,[2400959708+ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD [28+esp] + add esi,ebp + ; 40_59 55 + mov ebp,ebx + xor edi,DWORD [36+esp] + xor ebp,ecx + xor edi,DWORD [60+esp] + and ebp,eax + xor edi,DWORD [16+esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD [28+esp],edi + lea edi,[2400959708+ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD [32+esp] + add edi,ebp + ; 40_59 56 + mov ebp,eax + xor edx,DWORD [40+esp] + xor ebp,ebx + xor edx,DWORD [esp] + and ebp,esi + xor edx,DWORD [20+esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD [32+esp],edx + lea edx,[2400959708+ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD [36+esp] + add edx,ebp + ; 40_59 57 + mov ebp,esi + xor ecx,DWORD [44+esp] + xor ebp,eax + xor ecx,DWORD [4+esp] + and ebp,edi + xor ecx,DWORD [24+esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD [36+esp],ecx + lea ecx,[2400959708+ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD [40+esp] + add ecx,ebp + ; 40_59 58 + mov ebp,edi + xor ebx,DWORD [48+esp] + xor ebp,esi + xor ebx,DWORD [8+esp] + and ebp,edx + xor ebx,DWORD [28+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [40+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [44+esp] + add ebx,ebp + ; 40_59 59 + mov ebp,edx + xor eax,DWORD [52+esp] + xor ebp,edi + xor eax,DWORD [12+esp] + and ebp,ecx + xor eax,DWORD [32+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [44+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [48+esp] + add eax,ebp + ; 20_39 60 + mov ebp,ebx + xor esi,DWORD [56+esp] + xor ebp,ecx + xor esi,DWORD [16+esp] + xor ebp,edx + xor esi,DWORD [36+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [48+esp],esi + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [52+esp] + add esi,ebp + ; 20_39 61 + mov ebp,eax + xor edi,DWORD [60+esp] + xor ebp,ebx + xor edi,DWORD [20+esp] + xor ebp,ecx + xor edi,DWORD [40+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [52+esp],edi + lea edi,[3395469782+edx*1+edi] + mov edx,DWORD [56+esp] + add edi,ebp + ; 20_39 62 + mov ebp,esi + xor edx,DWORD [esp] + xor ebp,eax + xor edx,DWORD [24+esp] + xor ebp,ebx + xor edx,DWORD [44+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [56+esp],edx + lea edx,[3395469782+ecx*1+edx] + mov ecx,DWORD [60+esp] + add edx,ebp + ; 20_39 63 + mov ebp,edi + xor ecx,DWORD [4+esp] + xor ebp,esi + xor ecx,DWORD [28+esp] + xor ebp,eax + xor ecx,DWORD [48+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [60+esp],ecx + lea ecx,[3395469782+ebx*1+ecx] + mov ebx,DWORD [esp] + add ecx,ebp + ; 20_39 64 + mov ebp,edx + xor ebx,DWORD [8+esp] + xor ebp,edi + xor ebx,DWORD [32+esp] + xor ebp,esi + xor ebx,DWORD [52+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [esp],ebx + lea ebx,[3395469782+eax*1+ebx] + mov eax,DWORD [4+esp] + add ebx,ebp + ; 20_39 65 + mov ebp,ecx + xor eax,DWORD [12+esp] + xor ebp,edx + xor eax,DWORD [36+esp] + xor ebp,edi + xor eax,DWORD [56+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [4+esp],eax + lea eax,[3395469782+esi*1+eax] + mov esi,DWORD [8+esp] + add eax,ebp + ; 20_39 66 + mov ebp,ebx + xor esi,DWORD [16+esp] + xor ebp,ecx + xor esi,DWORD [40+esp] + xor ebp,edx + xor esi,DWORD [60+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [8+esp],esi + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [12+esp] + add esi,ebp + ; 20_39 67 + mov ebp,eax + xor edi,DWORD [20+esp] + xor ebp,ebx + xor edi,DWORD [44+esp] + xor ebp,ecx + xor edi,DWORD [esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [12+esp],edi + lea edi,[3395469782+edx*1+edi] + mov edx,DWORD [16+esp] + add edi,ebp + ; 20_39 68 + mov ebp,esi + xor edx,DWORD [24+esp] + xor ebp,eax + xor edx,DWORD [48+esp] + xor ebp,ebx + xor edx,DWORD [4+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [16+esp],edx + lea edx,[3395469782+ecx*1+edx] + mov ecx,DWORD [20+esp] + add edx,ebp + ; 20_39 69 + mov ebp,edi + xor ecx,DWORD [28+esp] + xor ebp,esi + xor ecx,DWORD [52+esp] + xor ebp,eax + xor ecx,DWORD [8+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [20+esp],ecx + lea ecx,[3395469782+ebx*1+ecx] + mov ebx,DWORD [24+esp] + add ecx,ebp + ; 20_39 70 + mov ebp,edx + xor ebx,DWORD [32+esp] + xor ebp,edi + xor ebx,DWORD [56+esp] + xor ebp,esi + xor ebx,DWORD [12+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [24+esp],ebx + lea ebx,[3395469782+eax*1+ebx] + mov eax,DWORD [28+esp] + add ebx,ebp + ; 20_39 71 + mov ebp,ecx + xor eax,DWORD [36+esp] + xor ebp,edx + xor eax,DWORD [60+esp] + xor ebp,edi + xor eax,DWORD [16+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [28+esp],eax + lea eax,[3395469782+esi*1+eax] + mov esi,DWORD [32+esp] + add eax,ebp + ; 20_39 72 + mov ebp,ebx + xor esi,DWORD [40+esp] + xor ebp,ecx + xor esi,DWORD [esp] + xor ebp,edx + xor esi,DWORD [20+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [32+esp],esi + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [36+esp] + add esi,ebp + ; 20_39 73 + mov ebp,eax + xor edi,DWORD [44+esp] + xor ebp,ebx + xor edi,DWORD [4+esp] + xor ebp,ecx + xor edi,DWORD [24+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [36+esp],edi + lea edi,[3395469782+edx*1+edi] + mov edx,DWORD [40+esp] + add edi,ebp + ; 20_39 74 + mov ebp,esi + xor edx,DWORD [48+esp] + xor ebp,eax + xor edx,DWORD [8+esp] + xor ebp,ebx + xor edx,DWORD [28+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [40+esp],edx + lea edx,[3395469782+ecx*1+edx] + mov ecx,DWORD [44+esp] + add edx,ebp + ; 20_39 75 + mov ebp,edi + xor ecx,DWORD [52+esp] + xor ebp,esi + xor ecx,DWORD [12+esp] + xor ebp,eax + xor ecx,DWORD [32+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [44+esp],ecx + lea ecx,[3395469782+ebx*1+ecx] + mov ebx,DWORD [48+esp] + add ecx,ebp + ; 20_39 76 + mov ebp,edx + xor ebx,DWORD [56+esp] + xor ebp,edi + xor ebx,DWORD [16+esp] + xor ebp,esi + xor ebx,DWORD [36+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [48+esp],ebx + lea ebx,[3395469782+eax*1+ebx] + mov eax,DWORD [52+esp] + add ebx,ebp + ; 20_39 77 + mov ebp,ecx + xor eax,DWORD [60+esp] + xor ebp,edx + xor eax,DWORD [20+esp] + xor ebp,edi + xor eax,DWORD [40+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + lea eax,[3395469782+esi*1+eax] + mov esi,DWORD [56+esp] + add eax,ebp + ; 20_39 78 + mov ebp,ebx + xor esi,DWORD [esp] + xor ebp,ecx + xor esi,DWORD [24+esp] + xor ebp,edx + xor esi,DWORD [44+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [60+esp] + add esi,ebp + ; 20_39 79 + mov ebp,eax + xor edi,DWORD [4+esp] + xor ebp,ebx + xor edi,DWORD [28+esp] + xor ebp,ecx + xor edi,DWORD [48+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + lea edi,[3395469782+edx*1+edi] + add edi,ebp + mov ebp,DWORD [96+esp] + mov edx,DWORD [100+esp] + add edi,DWORD [ebp] + add esi,DWORD [4+ebp] + add eax,DWORD [8+ebp] + add ebx,DWORD [12+ebp] + add ecx,DWORD [16+ebp] + mov DWORD [ebp],edi + add edx,64 + mov DWORD [4+ebp],esi + cmp edx,DWORD [104+esp] + mov DWORD [8+ebp],eax + mov edi,ecx + mov DWORD [12+ebp],ebx + mov esi,edx + mov DWORD [16+ebp],ecx + jb NEAR L$000loop + add esp,76 + pop edi + pop esi + pop ebx + pop ebp + ret +global _sha1_block_data_order_ssse3 +align 16 +_sha1_block_data_order_ssse3: +L$_sha1_block_data_order_ssse3_begin: + push ebp + push ebx + push esi + push edi + call L$001pic_point +L$001pic_point: + pop ebp + lea ebp,[(L$K_XX_XX-L$001pic_point)+ebp] + movdqa xmm7,[ebp] + movdqa xmm0,[16+ebp] + movdqa xmm1,[32+ebp] + movdqa xmm2,[48+ebp] + movdqa xmm6,[64+ebp] + mov edi,DWORD [20+esp] + mov ebp,DWORD [24+esp] + mov edx,DWORD [28+esp] + mov esi,esp + sub esp,208 + and esp,-64 + movdqa [112+esp],xmm0 + movdqa [128+esp],xmm1 + movdqa [144+esp],xmm2 + shl edx,6 + movdqa [160+esp],xmm7 + add edx,ebp + movdqa [176+esp],xmm6 + add ebp,64 + mov DWORD [192+esp],edi + mov DWORD [196+esp],ebp + mov DWORD [200+esp],edx + mov DWORD [204+esp],esi + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov edi,DWORD [16+edi] + mov esi,ebx + movdqu xmm0,[ebp-64] + movdqu xmm1,[ebp-48] + movdqu xmm2,[ebp-32] + movdqu xmm3,[ebp-16] +db 102,15,56,0,198 +db 102,15,56,0,206 +db 102,15,56,0,214 + movdqa [96+esp],xmm7 +db 102,15,56,0,222 + paddd xmm0,xmm7 + paddd xmm1,xmm7 + paddd xmm2,xmm7 + movdqa [esp],xmm0 + psubd xmm0,xmm7 + movdqa [16+esp],xmm1 + psubd xmm1,xmm7 + movdqa [32+esp],xmm2 + mov ebp,ecx + psubd xmm2,xmm7 + xor ebp,edx + pshufd xmm4,xmm0,238 + and esi,ebp + jmp NEAR L$002loop +align 16 +L$002loop: + ror ebx,2 + xor esi,edx + mov ebp,eax + punpcklqdq xmm4,xmm1 + movdqa xmm6,xmm3 + add edi,DWORD [esp] + xor ebx,ecx + paddd xmm7,xmm3 + movdqa [64+esp],xmm0 + rol eax,5 + add edi,esi + psrldq xmm6,4 + and ebp,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add edi,eax + ror eax,7 + pxor xmm6,xmm2 + xor ebp,ecx + mov esi,edi + add edx,DWORD [4+esp] + pxor xmm4,xmm6 + xor eax,ebx + rol edi,5 + movdqa [48+esp],xmm7 + add edx,ebp + and esi,eax + movdqa xmm0,xmm4 + xor eax,ebx + add edx,edi + ror edi,7 + movdqa xmm6,xmm4 + xor esi,ebx + pslldq xmm0,12 + paddd xmm4,xmm4 + mov ebp,edx + add ecx,DWORD [8+esp] + psrld xmm6,31 + xor edi,eax + rol edx,5 + movdqa xmm7,xmm0 + add ecx,esi + and ebp,edi + xor edi,eax + psrld xmm0,30 + add ecx,edx + ror edx,7 + por xmm4,xmm6 + xor ebp,eax + mov esi,ecx + add ebx,DWORD [12+esp] + pslld xmm7,2 + xor edx,edi + rol ecx,5 + pxor xmm4,xmm0 + movdqa xmm0,[96+esp] + add ebx,ebp + and esi,edx + pxor xmm4,xmm7 + pshufd xmm5,xmm1,238 + xor edx,edi + add ebx,ecx + ror ecx,7 + xor esi,edi + mov ebp,ebx + punpcklqdq xmm5,xmm2 + movdqa xmm7,xmm4 + add eax,DWORD [16+esp] + xor ecx,edx + paddd xmm0,xmm4 + movdqa [80+esp],xmm1 + rol ebx,5 + add eax,esi + psrldq xmm7,4 + and ebp,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm7,xmm3 + xor ebp,edx + mov esi,eax + add edi,DWORD [20+esp] + pxor xmm5,xmm7 + xor ebx,ecx + rol eax,5 + movdqa [esp],xmm0 + add edi,ebp + and esi,ebx + movdqa xmm1,xmm5 + xor ebx,ecx + add edi,eax + ror eax,7 + movdqa xmm7,xmm5 + xor esi,ecx + pslldq xmm1,12 + paddd xmm5,xmm5 + mov ebp,edi + add edx,DWORD [24+esp] + psrld xmm7,31 + xor eax,ebx + rol edi,5 + movdqa xmm0,xmm1 + add edx,esi + and ebp,eax + xor eax,ebx + psrld xmm1,30 + add edx,edi + ror edi,7 + por xmm5,xmm7 + xor ebp,ebx + mov esi,edx + add ecx,DWORD [28+esp] + pslld xmm0,2 + xor edi,eax + rol edx,5 + pxor xmm5,xmm1 + movdqa xmm1,[112+esp] + add ecx,ebp + and esi,edi + pxor xmm5,xmm0 + pshufd xmm6,xmm2,238 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + punpcklqdq xmm6,xmm3 + movdqa xmm0,xmm5 + add ebx,DWORD [32+esp] + xor edx,edi + paddd xmm1,xmm5 + movdqa [96+esp],xmm2 + rol ecx,5 + add ebx,esi + psrldq xmm0,4 + and ebp,edx + xor edx,edi + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm0,xmm4 + xor ebp,edi + mov esi,ebx + add eax,DWORD [36+esp] + pxor xmm6,xmm0 + xor ecx,edx + rol ebx,5 + movdqa [16+esp],xmm1 + add eax,ebp + and esi,ecx + movdqa xmm2,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm0,xmm6 + xor esi,edx + pslldq xmm2,12 + paddd xmm6,xmm6 + mov ebp,eax + add edi,DWORD [40+esp] + psrld xmm0,31 + xor ebx,ecx + rol eax,5 + movdqa xmm1,xmm2 + add edi,esi + and ebp,ebx + xor ebx,ecx + psrld xmm2,30 + add edi,eax + ror eax,7 + por xmm6,xmm0 + xor ebp,ecx + movdqa xmm0,[64+esp] + mov esi,edi + add edx,DWORD [44+esp] + pslld xmm1,2 + xor eax,ebx + rol edi,5 + pxor xmm6,xmm2 + movdqa xmm2,[112+esp] + add edx,ebp + and esi,eax + pxor xmm6,xmm1 + pshufd xmm7,xmm3,238 + xor eax,ebx + add edx,edi + ror edi,7 + xor esi,ebx + mov ebp,edx + punpcklqdq xmm7,xmm4 + movdqa xmm1,xmm6 + add ecx,DWORD [48+esp] + xor edi,eax + paddd xmm2,xmm6 + movdqa [64+esp],xmm3 + rol edx,5 + add ecx,esi + psrldq xmm1,4 + and ebp,edi + xor edi,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm1,xmm5 + xor ebp,eax + mov esi,ecx + add ebx,DWORD [52+esp] + pxor xmm7,xmm1 + xor edx,edi + rol ecx,5 + movdqa [32+esp],xmm2 + add ebx,ebp + and esi,edx + movdqa xmm3,xmm7 + xor edx,edi + add ebx,ecx + ror ecx,7 + movdqa xmm1,xmm7 + xor esi,edi + pslldq xmm3,12 + paddd xmm7,xmm7 + mov ebp,ebx + add eax,DWORD [56+esp] + psrld xmm1,31 + xor ecx,edx + rol ebx,5 + movdqa xmm2,xmm3 + add eax,esi + and ebp,ecx + xor ecx,edx + psrld xmm3,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm1 + xor ebp,edx + movdqa xmm1,[80+esp] + mov esi,eax + add edi,DWORD [60+esp] + pslld xmm2,2 + xor ebx,ecx + rol eax,5 + pxor xmm7,xmm3 + movdqa xmm3,[112+esp] + add edi,ebp + and esi,ebx + pxor xmm7,xmm2 + pshufd xmm2,xmm6,238 + xor ebx,ecx + add edi,eax + ror eax,7 + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + xor esi,ecx + mov ebp,edi + add edx,DWORD [esp] + pxor xmm0,xmm1 + movdqa [80+esp],xmm4 + xor eax,ebx + rol edi,5 + movdqa xmm4,xmm3 + add edx,esi + paddd xmm3,xmm7 + and ebp,eax + pxor xmm0,xmm2 + xor eax,ebx + add edx,edi + ror edi,7 + xor ebp,ebx + movdqa xmm2,xmm0 + movdqa [48+esp],xmm3 + mov esi,edx + add ecx,DWORD [4+esp] + xor edi,eax + rol edx,5 + pslld xmm0,2 + add ecx,ebp + and esi,edi + psrld xmm2,30 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + add ebx,DWORD [8+esp] + xor edx,edi + rol ecx,5 + por xmm0,xmm2 + add ebx,esi + and ebp,edx + movdqa xmm2,[96+esp] + xor edx,edi + add ebx,ecx + add eax,DWORD [12+esp] + xor ebp,edi + mov esi,ebx + pshufd xmm3,xmm7,238 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [16+esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm1,xmm2 + movdqa [96+esp],xmm5 + add edi,esi + xor ebp,ecx + movdqa xmm5,xmm4 + ror ebx,7 + paddd xmm4,xmm0 + add edi,eax + pxor xmm1,xmm3 + add edx,DWORD [20+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm3,xmm1 + movdqa [esp],xmm4 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD [24+esp] + xor esi,eax + psrld xmm3,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm1,xmm3 + add ebx,DWORD [28+esp] + xor ebp,edi + movdqa xmm3,[64+esp] + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + pshufd xmm4,xmm0,238 + add ebx,ecx + add eax,DWORD [32+esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + xor esi,edx + mov ebp,ebx + rol ebx,5 + pxor xmm2,xmm3 + movdqa [64+esp],xmm6 + add eax,esi + xor ebp,edx + movdqa xmm6,[128+esp] + ror ecx,7 + paddd xmm5,xmm1 + add eax,ebx + pxor xmm2,xmm4 + add edi,DWORD [36+esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + movdqa xmm4,xmm2 + movdqa [16+esp],xmm5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + pslld xmm2,2 + add edx,DWORD [40+esp] + xor esi,ebx + psrld xmm4,30 + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + por xmm2,xmm4 + add ecx,DWORD [44+esp] + xor ebp,eax + movdqa xmm4,[80+esp] + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + pshufd xmm5,xmm1,238 + add ecx,edx + add ebx,DWORD [48+esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,edi + mov ebp,ecx + rol ecx,5 + pxor xmm3,xmm4 + movdqa [80+esp],xmm7 + add ebx,esi + xor ebp,edi + movdqa xmm7,xmm6 + ror edx,7 + paddd xmm6,xmm2 + add ebx,ecx + pxor xmm3,xmm5 + add eax,DWORD [52+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + movdqa xmm5,xmm3 + movdqa [32+esp],xmm6 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + pslld xmm3,2 + add edi,DWORD [56+esp] + xor esi,ecx + psrld xmm5,30 + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + por xmm3,xmm5 + add edx,DWORD [60+esp] + xor ebp,ebx + movdqa xmm5,[96+esp] + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + pshufd xmm6,xmm2,238 + add edx,edi + add ecx,DWORD [esp] + pxor xmm4,xmm0 + punpcklqdq xmm6,xmm3 + xor esi,eax + mov ebp,edx + rol edx,5 + pxor xmm4,xmm5 + movdqa [96+esp],xmm0 + add ecx,esi + xor ebp,eax + movdqa xmm0,xmm7 + ror edi,7 + paddd xmm7,xmm3 + add ecx,edx + pxor xmm4,xmm6 + add ebx,DWORD [4+esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + movdqa xmm6,xmm4 + movdqa [48+esp],xmm7 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + pslld xmm4,2 + add eax,DWORD [8+esp] + xor esi,edx + psrld xmm6,30 + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + por xmm4,xmm6 + add edi,DWORD [12+esp] + xor ebp,ecx + movdqa xmm6,[64+esp] + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + pshufd xmm7,xmm3,238 + add edi,eax + add edx,DWORD [16+esp] + pxor xmm5,xmm1 + punpcklqdq xmm7,xmm4 + xor esi,ebx + mov ebp,edi + rol edi,5 + pxor xmm5,xmm6 + movdqa [64+esp],xmm1 + add edx,esi + xor ebp,ebx + movdqa xmm1,xmm0 + ror eax,7 + paddd xmm0,xmm4 + add edx,edi + pxor xmm5,xmm7 + add ecx,DWORD [20+esp] + xor ebp,eax + mov esi,edx + rol edx,5 + movdqa xmm7,xmm5 + movdqa [esp],xmm0 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + pslld xmm5,2 + add ebx,DWORD [24+esp] + xor esi,edi + psrld xmm7,30 + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + por xmm5,xmm7 + add eax,DWORD [28+esp] + movdqa xmm7,[80+esp] + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pshufd xmm0,xmm4,238 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD [32+esp] + pxor xmm6,xmm2 + punpcklqdq xmm0,xmm5 + and esi,ecx + xor ecx,edx + ror ebx,7 + pxor xmm6,xmm7 + movdqa [80+esp],xmm2 + mov ebp,eax + xor esi,ecx + rol eax,5 + movdqa xmm2,xmm1 + add edi,esi + paddd xmm1,xmm5 + xor ebp,ebx + pxor xmm6,xmm0 + xor ebx,ecx + add edi,eax + add edx,DWORD [36+esp] + and ebp,ebx + movdqa xmm0,xmm6 + movdqa [16+esp],xmm1 + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + pslld xmm6,2 + add edx,ebp + xor esi,eax + psrld xmm0,30 + xor eax,ebx + add edx,edi + add ecx,DWORD [40+esp] + and esi,eax + xor eax,ebx + ror edi,7 + por xmm6,xmm0 + mov ebp,edx + xor esi,eax + movdqa xmm0,[96+esp] + rol edx,5 + add ecx,esi + xor ebp,edi + xor edi,eax + add ecx,edx + pshufd xmm1,xmm5,238 + add ebx,DWORD [44+esp] + and ebp,edi + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + add eax,DWORD [48+esp] + pxor xmm7,xmm3 + punpcklqdq xmm1,xmm6 + and esi,edx + xor edx,edi + ror ecx,7 + pxor xmm7,xmm0 + movdqa [96+esp],xmm3 + mov ebp,ebx + xor esi,edx + rol ebx,5 + movdqa xmm3,[144+esp] + add eax,esi + paddd xmm2,xmm6 + xor ebp,ecx + pxor xmm7,xmm1 + xor ecx,edx + add eax,ebx + add edi,DWORD [52+esp] + and ebp,ecx + movdqa xmm1,xmm7 + movdqa [32+esp],xmm2 + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + pslld xmm7,2 + add edi,ebp + xor esi,ebx + psrld xmm1,30 + xor ebx,ecx + add edi,eax + add edx,DWORD [56+esp] + and esi,ebx + xor ebx,ecx + ror eax,7 + por xmm7,xmm1 + mov ebp,edi + xor esi,ebx + movdqa xmm1,[64+esp] + rol edi,5 + add edx,esi + xor ebp,eax + xor eax,ebx + add edx,edi + pshufd xmm2,xmm6,238 + add ecx,DWORD [60+esp] + and ebp,eax + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + add ebx,DWORD [esp] + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + and esi,edi + xor edi,eax + ror edx,7 + pxor xmm0,xmm1 + movdqa [64+esp],xmm4 + mov ebp,ecx + xor esi,edi + rol ecx,5 + movdqa xmm4,xmm3 + add ebx,esi + paddd xmm3,xmm7 + xor ebp,edx + pxor xmm0,xmm2 + xor edx,edi + add ebx,ecx + add eax,DWORD [4+esp] + and ebp,edx + movdqa xmm2,xmm0 + movdqa [48+esp],xmm3 + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pslld xmm0,2 + add eax,ebp + xor esi,ecx + psrld xmm2,30 + xor ecx,edx + add eax,ebx + add edi,DWORD [8+esp] + and esi,ecx + xor ecx,edx + ror ebx,7 + por xmm0,xmm2 + mov ebp,eax + xor esi,ecx + movdqa xmm2,[80+esp] + rol eax,5 + add edi,esi + xor ebp,ebx + xor ebx,ecx + add edi,eax + pshufd xmm3,xmm7,238 + add edx,DWORD [12+esp] + and ebp,ebx + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + add ecx,DWORD [16+esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + and esi,eax + xor eax,ebx + ror edi,7 + pxor xmm1,xmm2 + movdqa [80+esp],xmm5 + mov ebp,edx + xor esi,eax + rol edx,5 + movdqa xmm5,xmm4 + add ecx,esi + paddd xmm4,xmm0 + xor ebp,edi + pxor xmm1,xmm3 + xor edi,eax + add ecx,edx + add ebx,DWORD [20+esp] + and ebp,edi + movdqa xmm3,xmm1 + movdqa [esp],xmm4 + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + pslld xmm1,2 + add ebx,ebp + xor esi,edx + psrld xmm3,30 + xor edx,edi + add ebx,ecx + add eax,DWORD [24+esp] + and esi,edx + xor edx,edi + ror ecx,7 + por xmm1,xmm3 + mov ebp,ebx + xor esi,edx + movdqa xmm3,[96+esp] + rol ebx,5 + add eax,esi + xor ebp,ecx + xor ecx,edx + add eax,ebx + pshufd xmm4,xmm0,238 + add edi,DWORD [28+esp] + and ebp,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD [32+esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + and esi,ebx + xor ebx,ecx + ror eax,7 + pxor xmm2,xmm3 + movdqa [96+esp],xmm6 + mov ebp,edi + xor esi,ebx + rol edi,5 + movdqa xmm6,xmm5 + add edx,esi + paddd xmm5,xmm1 + xor ebp,eax + pxor xmm2,xmm4 + xor eax,ebx + add edx,edi + add ecx,DWORD [36+esp] + and ebp,eax + movdqa xmm4,xmm2 + movdqa [16+esp],xmm5 + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + pslld xmm2,2 + add ecx,ebp + xor esi,edi + psrld xmm4,30 + xor edi,eax + add ecx,edx + add ebx,DWORD [40+esp] + and esi,edi + xor edi,eax + ror edx,7 + por xmm2,xmm4 + mov ebp,ecx + xor esi,edi + movdqa xmm4,[64+esp] + rol ecx,5 + add ebx,esi + xor ebp,edx + xor edx,edi + add ebx,ecx + pshufd xmm5,xmm1,238 + add eax,DWORD [44+esp] + and ebp,edx + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + add eax,ebp + xor esi,edx + add eax,ebx + add edi,DWORD [48+esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm3,xmm4 + movdqa [64+esp],xmm7 + add edi,esi + xor ebp,ecx + movdqa xmm7,xmm6 + ror ebx,7 + paddd xmm6,xmm2 + add edi,eax + pxor xmm3,xmm5 + add edx,DWORD [52+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm5,xmm3 + movdqa [32+esp],xmm6 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD [56+esp] + xor esi,eax + psrld xmm5,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm3,xmm5 + add ebx,DWORD [60+esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + paddd xmm7,xmm3 + add eax,ebx + add edi,DWORD [4+esp] + xor ebp,ecx + mov esi,eax + movdqa [48+esp],xmm7 + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [8+esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [12+esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + mov ebp,DWORD [196+esp] + cmp ebp,DWORD [200+esp] + je NEAR L$003done + movdqa xmm7,[160+esp] + movdqa xmm6,[176+esp] + movdqu xmm0,[ebp] + movdqu xmm1,[16+ebp] + movdqu xmm2,[32+ebp] + movdqu xmm3,[48+ebp] + add ebp,64 +db 102,15,56,0,198 + mov DWORD [196+esp],ebp + movdqa [96+esp],xmm7 + add ebx,DWORD [16+esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 +db 102,15,56,0,206 + add ebx,ecx + add eax,DWORD [20+esp] + xor ebp,edx + mov esi,ebx + paddd xmm0,xmm7 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + movdqa [esp],xmm0 + add eax,ebx + add edi,DWORD [24+esp] + xor esi,ecx + mov ebp,eax + psubd xmm0,xmm7 + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [28+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [32+esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 +db 102,15,56,0,214 + add ecx,edx + add ebx,DWORD [36+esp] + xor ebp,edi + mov esi,ecx + paddd xmm1,xmm7 + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + movdqa [16+esp],xmm1 + add ebx,ecx + add eax,DWORD [40+esp] + xor esi,edx + mov ebp,ebx + psubd xmm1,xmm7 + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [44+esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [48+esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 +db 102,15,56,0,222 + add edx,edi + add ecx,DWORD [52+esp] + xor ebp,eax + mov esi,edx + paddd xmm2,xmm7 + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + movdqa [32+esp],xmm2 + add ecx,edx + add ebx,DWORD [56+esp] + xor esi,edi + mov ebp,ecx + psubd xmm2,xmm7 + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [60+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD [192+esp] + add eax,DWORD [ebp] + add esi,DWORD [4+ebp] + add ecx,DWORD [8+ebp] + mov DWORD [ebp],eax + add edx,DWORD [12+ebp] + mov DWORD [4+ebp],esi + add edi,DWORD [16+ebp] + mov DWORD [8+ebp],ecx + mov ebx,ecx + mov DWORD [12+ebp],edx + xor ebx,edx + mov DWORD [16+ebp],edi + mov ebp,esi + pshufd xmm4,xmm0,238 + and esi,ebx + mov ebx,ebp + jmp NEAR L$002loop +align 16 +L$003done: + add ebx,DWORD [16+esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [20+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [24+esp] + xor esi,ecx + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [28+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [32+esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + add ebx,DWORD [36+esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [40+esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [44+esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [48+esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [52+esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + add ebx,DWORD [56+esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [60+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD [192+esp] + add eax,DWORD [ebp] + mov esp,DWORD [204+esp] + add esi,DWORD [4+ebp] + add ecx,DWORD [8+ebp] + mov DWORD [ebp],eax + add edx,DWORD [12+ebp] + mov DWORD [4+ebp],esi + add edi,DWORD [16+ebp] + mov DWORD [8+ebp],ecx + mov DWORD [12+ebp],edx + mov DWORD [16+ebp],edi + pop edi + pop esi + pop ebx + pop ebp + ret +global _sha1_block_data_order_avx +align 16 +_sha1_block_data_order_avx: +L$_sha1_block_data_order_avx_begin: + push ebp + push ebx + push esi + push edi + call L$004pic_point +L$004pic_point: + pop ebp + lea ebp,[(L$K_XX_XX-L$004pic_point)+ebp] + vzeroall + vmovdqa xmm7,[ebp] + vmovdqa xmm0,[16+ebp] + vmovdqa xmm1,[32+ebp] + vmovdqa xmm2,[48+ebp] + vmovdqa xmm6,[64+ebp] + mov edi,DWORD [20+esp] + mov ebp,DWORD [24+esp] + mov edx,DWORD [28+esp] + mov esi,esp + sub esp,208 + and esp,-64 + vmovdqa [112+esp],xmm0 + vmovdqa [128+esp],xmm1 + vmovdqa [144+esp],xmm2 + shl edx,6 + vmovdqa [160+esp],xmm7 + add edx,ebp + vmovdqa [176+esp],xmm6 + add ebp,64 + mov DWORD [192+esp],edi + mov DWORD [196+esp],ebp + mov DWORD [200+esp],edx + mov DWORD [204+esp],esi + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov edi,DWORD [16+edi] + mov esi,ebx + vmovdqu xmm0,[ebp-64] + vmovdqu xmm1,[ebp-48] + vmovdqu xmm2,[ebp-32] + vmovdqu xmm3,[ebp-16] + vpshufb xmm0,xmm0,xmm6 + vpshufb xmm1,xmm1,xmm6 + vpshufb xmm2,xmm2,xmm6 + vmovdqa [96+esp],xmm7 + vpshufb xmm3,xmm3,xmm6 + vpaddd xmm4,xmm0,xmm7 + vpaddd xmm5,xmm1,xmm7 + vpaddd xmm6,xmm2,xmm7 + vmovdqa [esp],xmm4 + mov ebp,ecx + vmovdqa [16+esp],xmm5 + xor ebp,edx + vmovdqa [32+esp],xmm6 + and esi,ebp + jmp NEAR L$005loop +align 16 +L$005loop: + shrd ebx,ebx,2 + xor esi,edx + vpalignr xmm4,xmm1,xmm0,8 + mov ebp,eax + add edi,DWORD [esp] + vpaddd xmm7,xmm7,xmm3 + vmovdqa [64+esp],xmm0 + xor ebx,ecx + shld eax,eax,5 + vpsrldq xmm6,xmm3,4 + add edi,esi + and ebp,ebx + vpxor xmm4,xmm4,xmm0 + xor ebx,ecx + add edi,eax + vpxor xmm6,xmm6,xmm2 + shrd eax,eax,7 + xor ebp,ecx + vmovdqa [48+esp],xmm7 + mov esi,edi + add edx,DWORD [4+esp] + vpxor xmm4,xmm4,xmm6 + xor eax,ebx + shld edi,edi,5 + add edx,ebp + and esi,eax + vpsrld xmm6,xmm4,31 + xor eax,ebx + add edx,edi + shrd edi,edi,7 + xor esi,ebx + vpslldq xmm0,xmm4,12 + vpaddd xmm4,xmm4,xmm4 + mov ebp,edx + add ecx,DWORD [8+esp] + xor edi,eax + shld edx,edx,5 + vpsrld xmm7,xmm0,30 + vpor xmm4,xmm4,xmm6 + add ecx,esi + and ebp,edi + xor edi,eax + add ecx,edx + vpslld xmm0,xmm0,2 + shrd edx,edx,7 + xor ebp,eax + vpxor xmm4,xmm4,xmm7 + mov esi,ecx + add ebx,DWORD [12+esp] + xor edx,edi + shld ecx,ecx,5 + vpxor xmm4,xmm4,xmm0 + add ebx,ebp + and esi,edx + vmovdqa xmm0,[96+esp] + xor edx,edi + add ebx,ecx + shrd ecx,ecx,7 + xor esi,edi + vpalignr xmm5,xmm2,xmm1,8 + mov ebp,ebx + add eax,DWORD [16+esp] + vpaddd xmm0,xmm0,xmm4 + vmovdqa [80+esp],xmm1 + xor ecx,edx + shld ebx,ebx,5 + vpsrldq xmm7,xmm4,4 + add eax,esi + and ebp,ecx + vpxor xmm5,xmm5,xmm1 + xor ecx,edx + add eax,ebx + vpxor xmm7,xmm7,xmm3 + shrd ebx,ebx,7 + xor ebp,edx + vmovdqa [esp],xmm0 + mov esi,eax + add edi,DWORD [20+esp] + vpxor xmm5,xmm5,xmm7 + xor ebx,ecx + shld eax,eax,5 + add edi,ebp + and esi,ebx + vpsrld xmm7,xmm5,31 + xor ebx,ecx + add edi,eax + shrd eax,eax,7 + xor esi,ecx + vpslldq xmm1,xmm5,12 + vpaddd xmm5,xmm5,xmm5 + mov ebp,edi + add edx,DWORD [24+esp] + xor eax,ebx + shld edi,edi,5 + vpsrld xmm0,xmm1,30 + vpor xmm5,xmm5,xmm7 + add edx,esi + and ebp,eax + xor eax,ebx + add edx,edi + vpslld xmm1,xmm1,2 + shrd edi,edi,7 + xor ebp,ebx + vpxor xmm5,xmm5,xmm0 + mov esi,edx + add ecx,DWORD [28+esp] + xor edi,eax + shld edx,edx,5 + vpxor xmm5,xmm5,xmm1 + add ecx,ebp + and esi,edi + vmovdqa xmm1,[112+esp] + xor edi,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + vpalignr xmm6,xmm3,xmm2,8 + mov ebp,ecx + add ebx,DWORD [32+esp] + vpaddd xmm1,xmm1,xmm5 + vmovdqa [96+esp],xmm2 + xor edx,edi + shld ecx,ecx,5 + vpsrldq xmm0,xmm5,4 + add ebx,esi + and ebp,edx + vpxor xmm6,xmm6,xmm2 + xor edx,edi + add ebx,ecx + vpxor xmm0,xmm0,xmm4 + shrd ecx,ecx,7 + xor ebp,edi + vmovdqa [16+esp],xmm1 + mov esi,ebx + add eax,DWORD [36+esp] + vpxor xmm6,xmm6,xmm0 + xor ecx,edx + shld ebx,ebx,5 + add eax,ebp + and esi,ecx + vpsrld xmm0,xmm6,31 + xor ecx,edx + add eax,ebx + shrd ebx,ebx,7 + xor esi,edx + vpslldq xmm2,xmm6,12 + vpaddd xmm6,xmm6,xmm6 + mov ebp,eax + add edi,DWORD [40+esp] + xor ebx,ecx + shld eax,eax,5 + vpsrld xmm1,xmm2,30 + vpor xmm6,xmm6,xmm0 + add edi,esi + and ebp,ebx + xor ebx,ecx + add edi,eax + vpslld xmm2,xmm2,2 + vmovdqa xmm0,[64+esp] + shrd eax,eax,7 + xor ebp,ecx + vpxor xmm6,xmm6,xmm1 + mov esi,edi + add edx,DWORD [44+esp] + xor eax,ebx + shld edi,edi,5 + vpxor xmm6,xmm6,xmm2 + add edx,ebp + and esi,eax + vmovdqa xmm2,[112+esp] + xor eax,ebx + add edx,edi + shrd edi,edi,7 + xor esi,ebx + vpalignr xmm7,xmm4,xmm3,8 + mov ebp,edx + add ecx,DWORD [48+esp] + vpaddd xmm2,xmm2,xmm6 + vmovdqa [64+esp],xmm3 + xor edi,eax + shld edx,edx,5 + vpsrldq xmm1,xmm6,4 + add ecx,esi + and ebp,edi + vpxor xmm7,xmm7,xmm3 + xor edi,eax + add ecx,edx + vpxor xmm1,xmm1,xmm5 + shrd edx,edx,7 + xor ebp,eax + vmovdqa [32+esp],xmm2 + mov esi,ecx + add ebx,DWORD [52+esp] + vpxor xmm7,xmm7,xmm1 + xor edx,edi + shld ecx,ecx,5 + add ebx,ebp + and esi,edx + vpsrld xmm1,xmm7,31 + xor edx,edi + add ebx,ecx + shrd ecx,ecx,7 + xor esi,edi + vpslldq xmm3,xmm7,12 + vpaddd xmm7,xmm7,xmm7 + mov ebp,ebx + add eax,DWORD [56+esp] + xor ecx,edx + shld ebx,ebx,5 + vpsrld xmm2,xmm3,30 + vpor xmm7,xmm7,xmm1 + add eax,esi + and ebp,ecx + xor ecx,edx + add eax,ebx + vpslld xmm3,xmm3,2 + vmovdqa xmm1,[80+esp] + shrd ebx,ebx,7 + xor ebp,edx + vpxor xmm7,xmm7,xmm2 + mov esi,eax + add edi,DWORD [60+esp] + xor ebx,ecx + shld eax,eax,5 + vpxor xmm7,xmm7,xmm3 + add edi,ebp + and esi,ebx + vmovdqa xmm3,[112+esp] + xor ebx,ecx + add edi,eax + vpalignr xmm2,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + shrd eax,eax,7 + xor esi,ecx + mov ebp,edi + add edx,DWORD [esp] + vpxor xmm0,xmm0,xmm1 + vmovdqa [80+esp],xmm4 + xor eax,ebx + shld edi,edi,5 + vmovdqa xmm4,xmm3 + vpaddd xmm3,xmm3,xmm7 + add edx,esi + and ebp,eax + vpxor xmm0,xmm0,xmm2 + xor eax,ebx + add edx,edi + shrd edi,edi,7 + xor ebp,ebx + vpsrld xmm2,xmm0,30 + vmovdqa [48+esp],xmm3 + mov esi,edx + add ecx,DWORD [4+esp] + xor edi,eax + shld edx,edx,5 + vpslld xmm0,xmm0,2 + add ecx,ebp + and esi,edi + xor edi,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + mov ebp,ecx + add ebx,DWORD [8+esp] + vpor xmm0,xmm0,xmm2 + xor edx,edi + shld ecx,ecx,5 + vmovdqa xmm2,[96+esp] + add ebx,esi + and ebp,edx + xor edx,edi + add ebx,ecx + add eax,DWORD [12+esp] + xor ebp,edi + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpalignr xmm3,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add edi,DWORD [16+esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + vpxor xmm1,xmm1,xmm2 + vmovdqa [96+esp],xmm5 + add edi,esi + xor ebp,ecx + vmovdqa xmm5,xmm4 + vpaddd xmm4,xmm4,xmm0 + shrd ebx,ebx,7 + add edi,eax + vpxor xmm1,xmm1,xmm3 + add edx,DWORD [20+esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + vpsrld xmm3,xmm1,30 + vmovdqa [esp],xmm4 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + vpslld xmm1,xmm1,2 + add ecx,DWORD [24+esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + vpor xmm1,xmm1,xmm3 + add ebx,DWORD [28+esp] + xor ebp,edi + vmovdqa xmm3,[64+esp] + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + vpalignr xmm4,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add eax,DWORD [32+esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + vpxor xmm2,xmm2,xmm3 + vmovdqa [64+esp],xmm6 + add eax,esi + xor ebp,edx + vmovdqa xmm6,[128+esp] + vpaddd xmm5,xmm5,xmm1 + shrd ecx,ecx,7 + add eax,ebx + vpxor xmm2,xmm2,xmm4 + add edi,DWORD [36+esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + vpsrld xmm4,xmm2,30 + vmovdqa [16+esp],xmm5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + vpslld xmm2,xmm2,2 + add edx,DWORD [40+esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + vpor xmm2,xmm2,xmm4 + add ecx,DWORD [44+esp] + xor ebp,eax + vmovdqa xmm4,[80+esp] + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + vpalignr xmm5,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebx,DWORD [48+esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + vpxor xmm3,xmm3,xmm4 + vmovdqa [80+esp],xmm7 + add ebx,esi + xor ebp,edi + vmovdqa xmm7,xmm6 + vpaddd xmm6,xmm6,xmm2 + shrd edx,edx,7 + add ebx,ecx + vpxor xmm3,xmm3,xmm5 + add eax,DWORD [52+esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + vpsrld xmm5,xmm3,30 + vmovdqa [32+esp],xmm6 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpslld xmm3,xmm3,2 + add edi,DWORD [56+esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + add edi,esi + xor ebp,ecx + shrd ebx,ebx,7 + add edi,eax + vpor xmm3,xmm3,xmm5 + add edx,DWORD [60+esp] + xor ebp,ebx + vmovdqa xmm5,[96+esp] + mov esi,edi + shld edi,edi,5 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + vpalignr xmm6,xmm3,xmm2,8 + vpxor xmm4,xmm4,xmm0 + add ecx,DWORD [esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + vpxor xmm4,xmm4,xmm5 + vmovdqa [96+esp],xmm0 + add ecx,esi + xor ebp,eax + vmovdqa xmm0,xmm7 + vpaddd xmm7,xmm7,xmm3 + shrd edi,edi,7 + add ecx,edx + vpxor xmm4,xmm4,xmm6 + add ebx,DWORD [4+esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + vpsrld xmm6,xmm4,30 + vmovdqa [48+esp],xmm7 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + vpslld xmm4,xmm4,2 + add eax,DWORD [8+esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + vpor xmm4,xmm4,xmm6 + add edi,DWORD [12+esp] + xor ebp,ecx + vmovdqa xmm6,[64+esp] + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + vpalignr xmm7,xmm4,xmm3,8 + vpxor xmm5,xmm5,xmm1 + add edx,DWORD [16+esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + vpxor xmm5,xmm5,xmm6 + vmovdqa [64+esp],xmm1 + add edx,esi + xor ebp,ebx + vmovdqa xmm1,xmm0 + vpaddd xmm0,xmm0,xmm4 + shrd eax,eax,7 + add edx,edi + vpxor xmm5,xmm5,xmm7 + add ecx,DWORD [20+esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + vpsrld xmm7,xmm5,30 + vmovdqa [esp],xmm0 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + vpslld xmm5,xmm5,2 + add ebx,DWORD [24+esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + vpor xmm5,xmm5,xmm7 + add eax,DWORD [28+esp] + vmovdqa xmm7,[80+esp] + shrd ecx,ecx,7 + mov esi,ebx + xor ebp,edx + shld ebx,ebx,5 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + vpalignr xmm0,xmm5,xmm4,8 + vpxor xmm6,xmm6,xmm2 + add edi,DWORD [32+esp] + and esi,ecx + xor ecx,edx + shrd ebx,ebx,7 + vpxor xmm6,xmm6,xmm7 + vmovdqa [80+esp],xmm2 + mov ebp,eax + xor esi,ecx + vmovdqa xmm2,xmm1 + vpaddd xmm1,xmm1,xmm5 + shld eax,eax,5 + add edi,esi + vpxor xmm6,xmm6,xmm0 + xor ebp,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD [36+esp] + vpsrld xmm0,xmm6,30 + vmovdqa [16+esp],xmm1 + and ebp,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,edi + vpslld xmm6,xmm6,2 + xor ebp,ebx + shld edi,edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + add ecx,DWORD [40+esp] + and esi,eax + vpor xmm6,xmm6,xmm0 + xor eax,ebx + shrd edi,edi,7 + vmovdqa xmm0,[96+esp] + mov ebp,edx + xor esi,eax + shld edx,edx,5 + add ecx,esi + xor ebp,edi + xor edi,eax + add ecx,edx + add ebx,DWORD [44+esp] + and ebp,edi + xor edi,eax + shrd edx,edx,7 + mov esi,ecx + xor ebp,edi + shld ecx,ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + vpalignr xmm1,xmm6,xmm5,8 + vpxor xmm7,xmm7,xmm3 + add eax,DWORD [48+esp] + and esi,edx + xor edx,edi + shrd ecx,ecx,7 + vpxor xmm7,xmm7,xmm0 + vmovdqa [96+esp],xmm3 + mov ebp,ebx + xor esi,edx + vmovdqa xmm3,[144+esp] + vpaddd xmm2,xmm2,xmm6 + shld ebx,ebx,5 + add eax,esi + vpxor xmm7,xmm7,xmm1 + xor ebp,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD [52+esp] + vpsrld xmm1,xmm7,30 + vmovdqa [32+esp],xmm2 + and ebp,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + vpslld xmm7,xmm7,2 + xor ebp,ecx + shld eax,eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD [56+esp] + and esi,ebx + vpor xmm7,xmm7,xmm1 + xor ebx,ecx + shrd eax,eax,7 + vmovdqa xmm1,[64+esp] + mov ebp,edi + xor esi,ebx + shld edi,edi,5 + add edx,esi + xor ebp,eax + xor eax,ebx + add edx,edi + add ecx,DWORD [60+esp] + and ebp,eax + xor eax,ebx + shrd edi,edi,7 + mov esi,edx + xor ebp,eax + shld edx,edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + vpalignr xmm2,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + add ebx,DWORD [esp] + and esi,edi + xor edi,eax + shrd edx,edx,7 + vpxor xmm0,xmm0,xmm1 + vmovdqa [64+esp],xmm4 + mov ebp,ecx + xor esi,edi + vmovdqa xmm4,xmm3 + vpaddd xmm3,xmm3,xmm7 + shld ecx,ecx,5 + add ebx,esi + vpxor xmm0,xmm0,xmm2 + xor ebp,edx + xor edx,edi + add ebx,ecx + add eax,DWORD [4+esp] + vpsrld xmm2,xmm0,30 + vmovdqa [48+esp],xmm3 + and ebp,edx + xor edx,edi + shrd ecx,ecx,7 + mov esi,ebx + vpslld xmm0,xmm0,2 + xor ebp,edx + shld ebx,ebx,5 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD [8+esp] + and esi,ecx + vpor xmm0,xmm0,xmm2 + xor ecx,edx + shrd ebx,ebx,7 + vmovdqa xmm2,[80+esp] + mov ebp,eax + xor esi,ecx + shld eax,eax,5 + add edi,esi + xor ebp,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD [12+esp] + and ebp,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,edi + xor ebp,ebx + shld edi,edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + vpalignr xmm3,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ecx,DWORD [16+esp] + and esi,eax + xor eax,ebx + shrd edi,edi,7 + vpxor xmm1,xmm1,xmm2 + vmovdqa [80+esp],xmm5 + mov ebp,edx + xor esi,eax + vmovdqa xmm5,xmm4 + vpaddd xmm4,xmm4,xmm0 + shld edx,edx,5 + add ecx,esi + vpxor xmm1,xmm1,xmm3 + xor ebp,edi + xor edi,eax + add ecx,edx + add ebx,DWORD [20+esp] + vpsrld xmm3,xmm1,30 + vmovdqa [esp],xmm4 + and ebp,edi + xor edi,eax + shrd edx,edx,7 + mov esi,ecx + vpslld xmm1,xmm1,2 + xor ebp,edi + shld ecx,ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + add eax,DWORD [24+esp] + and esi,edx + vpor xmm1,xmm1,xmm3 + xor edx,edi + shrd ecx,ecx,7 + vmovdqa xmm3,[96+esp] + mov ebp,ebx + xor esi,edx + shld ebx,ebx,5 + add eax,esi + xor ebp,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD [28+esp] + and ebp,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + xor ebp,ecx + shld eax,eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + vpalignr xmm4,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add edx,DWORD [32+esp] + and esi,ebx + xor ebx,ecx + shrd eax,eax,7 + vpxor xmm2,xmm2,xmm3 + vmovdqa [96+esp],xmm6 + mov ebp,edi + xor esi,ebx + vmovdqa xmm6,xmm5 + vpaddd xmm5,xmm5,xmm1 + shld edi,edi,5 + add edx,esi + vpxor xmm2,xmm2,xmm4 + xor ebp,eax + xor eax,ebx + add edx,edi + add ecx,DWORD [36+esp] + vpsrld xmm4,xmm2,30 + vmovdqa [16+esp],xmm5 + and ebp,eax + xor eax,ebx + shrd edi,edi,7 + mov esi,edx + vpslld xmm2,xmm2,2 + xor ebp,eax + shld edx,edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + add ebx,DWORD [40+esp] + and esi,edi + vpor xmm2,xmm2,xmm4 + xor edi,eax + shrd edx,edx,7 + vmovdqa xmm4,[64+esp] + mov ebp,ecx + xor esi,edi + shld ecx,ecx,5 + add ebx,esi + xor ebp,edx + xor edx,edi + add ebx,ecx + add eax,DWORD [44+esp] + and ebp,edx + xor edx,edi + shrd ecx,ecx,7 + mov esi,ebx + xor ebp,edx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + add eax,ebx + vpalignr xmm5,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add edi,DWORD [48+esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + vpxor xmm3,xmm3,xmm4 + vmovdqa [64+esp],xmm7 + add edi,esi + xor ebp,ecx + vmovdqa xmm7,xmm6 + vpaddd xmm6,xmm6,xmm2 + shrd ebx,ebx,7 + add edi,eax + vpxor xmm3,xmm3,xmm5 + add edx,DWORD [52+esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + vpsrld xmm5,xmm3,30 + vmovdqa [32+esp],xmm6 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + vpslld xmm3,xmm3,2 + add ecx,DWORD [56+esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + vpor xmm3,xmm3,xmm5 + add ebx,DWORD [60+esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD [esp] + vpaddd xmm7,xmm7,xmm3 + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + vmovdqa [48+esp],xmm7 + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD [4+esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD [8+esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD [12+esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + mov ebp,DWORD [196+esp] + cmp ebp,DWORD [200+esp] + je NEAR L$006done + vmovdqa xmm7,[160+esp] + vmovdqa xmm6,[176+esp] + vmovdqu xmm0,[ebp] + vmovdqu xmm1,[16+ebp] + vmovdqu xmm2,[32+ebp] + vmovdqu xmm3,[48+ebp] + add ebp,64 + vpshufb xmm0,xmm0,xmm6 + mov DWORD [196+esp],ebp + vmovdqa [96+esp],xmm7 + add ebx,DWORD [16+esp] + xor esi,edi + vpshufb xmm1,xmm1,xmm6 + mov ebp,ecx + shld ecx,ecx,5 + vpaddd xmm4,xmm0,xmm7 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + vmovdqa [esp],xmm4 + add eax,DWORD [20+esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD [24+esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + add edi,esi + xor ebp,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD [28+esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD [32+esp] + xor esi,eax + vpshufb xmm2,xmm2,xmm6 + mov ebp,edx + shld edx,edx,5 + vpaddd xmm5,xmm1,xmm7 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + vmovdqa [16+esp],xmm5 + add ebx,DWORD [36+esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD [40+esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD [44+esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD [48+esp] + xor esi,ebx + vpshufb xmm3,xmm3,xmm6 + mov ebp,edi + shld edi,edi,5 + vpaddd xmm6,xmm2,xmm7 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + vmovdqa [32+esp],xmm6 + add ecx,DWORD [52+esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + add ebx,DWORD [56+esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD [60+esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + shrd ecx,ecx,7 + add eax,ebx + mov ebp,DWORD [192+esp] + add eax,DWORD [ebp] + add esi,DWORD [4+ebp] + add ecx,DWORD [8+ebp] + mov DWORD [ebp],eax + add edx,DWORD [12+ebp] + mov DWORD [4+ebp],esi + add edi,DWORD [16+ebp] + mov ebx,ecx + mov DWORD [8+ebp],ecx + xor ebx,edx + mov DWORD [12+ebp],edx + mov DWORD [16+ebp],edi + mov ebp,esi + and esi,ebx + mov ebx,ebp + jmp NEAR L$005loop +align 16 +L$006done: + add ebx,DWORD [16+esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD [20+esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD [24+esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + add edi,esi + xor ebp,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD [28+esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD [32+esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + add ebx,DWORD [36+esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD [40+esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD [44+esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD [48+esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD [52+esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + add ebx,DWORD [56+esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD [60+esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + shrd ecx,ecx,7 + add eax,ebx + vzeroall + mov ebp,DWORD [192+esp] + add eax,DWORD [ebp] + mov esp,DWORD [204+esp] + add esi,DWORD [4+ebp] + add ecx,DWORD [8+ebp] + mov DWORD [ebp],eax + add edx,DWORD [12+ebp] + mov DWORD [4+ebp],esi + add edi,DWORD [16+ebp] + mov DWORD [8+ebp],ecx + mov DWORD [12+ebp],edx + mov DWORD [16+ebp],edi + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$K_XX_XX: +dd 1518500249,1518500249,1518500249,1518500249 +dd 1859775393,1859775393,1859775393,1859775393 +dd 2400959708,2400959708,2400959708,2400959708 +dd 3395469782,3395469782,3395469782,3395469782 +dd 66051,67438087,134810123,202182159 +db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +db 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-armv4-large-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha1-armv4-large-linux.S new file mode 100644 index 0000000000..323e6e6e12 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-armv4-large-linux.S @@ -0,0 +1,1481 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +#include + +.text +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,%function + +.align 5 +sha1_block_data_order_nohw: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) +#if defined(__thumb2__) + mov r12,sp + teq r14,r12 +#else + teq r14,sp +#endif + bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 +#if __ARM_ARCH<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) +#if defined(__thumb2__) + mov r12,sp + teq r14,r12 +#else + teq r14,sp @ preserve carry +#endif + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 +#if defined(__thumb2__) + mov r12,sp + teq r14,r12 +#else + teq r14,sp +#endif + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw + +.align 5 +.LK_00_19:.word 0x5a827999 +.LK_20_39:.word 0x6ed9eba1 +.LK_40_59:.word 0x8f1bbcdc +.LK_60_79:.word 0xca62c1d6 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 5 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl sha1_block_data_order_neon +.hidden sha1_block_data_order_neon +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov r14,sp + sub r12,sp,#64 + adr r8,.LK_00_19 + bic r12,r12,#15 @ align for 128-bit stores + + ldmia r0,{r3,r4,r5,r6,r7} @ load context + mov sp,r12 @ alloca + + vld1.8 {q0,q1},[r1]! @ handles unaligned + veor q15,q15,q15 + vld1.8 {q2,q3},[r1]! + vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 + vrev32.8 q0,q0 @ yes, even on + vrev32.8 q1,q1 @ big-endian... + vrev32.8 q2,q2 + vadd.i32 q8,q0,q14 + vrev32.8 q3,q3 + vadd.i32 q9,q1,q14 + vst1.32 {q8},[r12,:128]! + vadd.i32 q10,q2,q14 + vst1.32 {q9},[r12,:128]! + vst1.32 {q10},[r12,:128]! + ldr r9,[sp] @ big RAW stall + +.Loop_neon: + vext.8 q8,q0,q1,#8 + bic r10,r6,r4 + add r7,r7,r9 + and r11,r5,r4 + vadd.i32 q13,q3,q14 + ldr r9,[sp,#4] + add r7,r7,r3,ror#27 + vext.8 q12,q3,q15,#4 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q8,q8,q0 + bic r10,r5,r3 + add r6,r6,r9 + veor q12,q12,q2 + and r11,r4,r3 + ldr r9,[sp,#8] + veor q12,q12,q8 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q13,q15,q12,#4 + bic r10,r4,r7 + add r5,r5,r9 + vadd.i32 q8,q12,q12 + and r11,r3,r7 + ldr r9,[sp,#12] + vsri.32 q8,q12,#31 + add r5,r5,r6,ror#27 + eor r11,r11,r10 + mov r7,r7,ror#2 + vshr.u32 q12,q13,#30 + add r5,r5,r11 + bic r10,r3,r6 + vshl.u32 q13,q13,#2 + add r4,r4,r9 + and r11,r7,r6 + veor q8,q8,q12 + ldr r9,[sp,#16] + add r4,r4,r5,ror#27 + veor q8,q8,q13 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q9,q1,q2,#8 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + vadd.i32 q13,q8,q14 + ldr r9,[sp,#20] + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r4,ror#27 + vext.8 q12,q8,q15,#4 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + veor q9,q9,q1 + bic r10,r6,r4 + add r7,r7,r9 + veor q12,q12,q3 + and r11,r5,r4 + ldr r9,[sp,#24] + veor q12,q12,q9 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q13,q15,q12,#4 + bic r10,r5,r3 + add r6,r6,r9 + vadd.i32 q9,q12,q12 + and r11,r4,r3 + ldr r9,[sp,#28] + vsri.32 q9,q12,#31 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + mov r3,r3,ror#2 + vshr.u32 q12,q13,#30 + add r6,r6,r11 + bic r10,r4,r7 + vshl.u32 q13,q13,#2 + add r5,r5,r9 + and r11,r3,r7 + veor q9,q9,q12 + ldr r9,[sp,#32] + add r5,r5,r6,ror#27 + veor q9,q9,q13 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q10,q2,q3,#8 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + vadd.i32 q13,q9,q14 + ldr r9,[sp,#36] + add r4,r4,r5,ror#27 + vext.8 q12,q9,q15,#4 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q10,q10,q2 + bic r10,r7,r5 + add r3,r3,r9 + veor q12,q12,q8 + and r11,r6,r5 + ldr r9,[sp,#40] + veor q12,q12,q10 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q13,q15,q12,#4 + bic r10,r6,r4 + add r7,r7,r9 + vadd.i32 q10,q12,q12 + and r11,r5,r4 + ldr r9,[sp,#44] + vsri.32 q10,q12,#31 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + mov r4,r4,ror#2 + vshr.u32 q12,q13,#30 + add r7,r7,r11 + bic r10,r5,r3 + vshl.u32 q13,q13,#2 + add r6,r6,r9 + and r11,r4,r3 + veor q10,q10,q12 + ldr r9,[sp,#48] + add r6,r6,r7,ror#27 + veor q10,q10,q13 + eor r11,r11,r10 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q11,q3,q8,#8 + bic r10,r4,r7 + add r5,r5,r9 + and r11,r3,r7 + vadd.i32 q13,q10,q14 + ldr r9,[sp,#52] + add r5,r5,r6,ror#27 + vext.8 q12,q10,q15,#4 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q11,q11,q3 + bic r10,r3,r6 + add r4,r4,r9 + veor q12,q12,q9 + and r11,r7,r6 + ldr r9,[sp,#56] + veor q12,q12,q11 + add r4,r4,r5,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q13,q15,q12,#4 + bic r10,r7,r5 + add r3,r3,r9 + vadd.i32 q11,q12,q12 + and r11,r6,r5 + ldr r9,[sp,#60] + vsri.32 q11,q12,#31 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + vshr.u32 q12,q13,#30 + add r3,r3,r11 + bic r10,r6,r4 + vshl.u32 q13,q13,#2 + add r7,r7,r9 + and r11,r5,r4 + veor q11,q11,q12 + ldr r9,[sp,#0] + add r7,r7,r3,ror#27 + veor q11,q11,q13 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q10,q11,#8 + bic r10,r5,r3 + add r6,r6,r9 + and r11,r4,r3 + veor q0,q0,q8 + ldr r9,[sp,#4] + add r6,r6,r7,ror#27 + veor q0,q0,q1 + eor r11,r11,r10 + mov r3,r3,ror#2 + vadd.i32 q13,q11,q14 + add r6,r6,r11 + bic r10,r4,r7 + veor q12,q12,q0 + add r5,r5,r9 + and r11,r3,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r11,r10 + mov r7,r7,ror#2 + vsli.32 q0,q12,#2 + add r5,r5,r11 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + ldr r9,[sp,#12] + add r4,r4,r5,ror#27 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + ldr r9,[sp,#16] + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q11,q0,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#20] + veor q1,q1,q9 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q1,q1,q2 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q0,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q1 + ldr r9,[sp,#24] + eor r11,r10,r4 + vshr.u32 q1,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q1,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#28] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#32] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q0,q1,#8 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#36] + veor q2,q2,q10 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + veor q2,q2,q3 + mov r5,r5,ror#2 + add r3,r3,r11 + vadd.i32 q13,q1,q14 + eor r10,r4,r6 + vld1.32 {d28[],d29[]},[r8,:32]! + add r7,r7,r9 + veor q12,q12,q2 + ldr r9,[sp,#40] + eor r11,r10,r5 + vshr.u32 q2,q12,#30 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + vst1.32 {q13},[r12,:128]! + add r7,r7,r11 + eor r10,r3,r5 + vsli.32 q2,q12,#2 + add r6,r6,r9 + ldr r9,[sp,#44] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#48] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q1,q2,#8 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r7 + add r4,r4,r5,ror#27 + veor q3,q3,q8 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q2,q14 + eor r10,r5,r7 + add r3,r3,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r6 + vshr.u32 q3,q12,#30 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vst1.32 {q13},[r12,:128]! + add r3,r3,r11 + eor r10,r4,r6 + vsli.32 q3,q12,#2 + add r7,r7,r9 + ldr r9,[sp,#60] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#0] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q2,q3,#8 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#4] + veor q8,q8,q0 + eor r11,r10,r3 + add r5,r5,r6,ror#27 + veor q8,q8,q9 + mov r7,r7,ror#2 + add r5,r5,r11 + vadd.i32 q13,q3,q14 + eor r10,r6,r3 + add r4,r4,r9 + veor q12,q12,q8 + ldr r9,[sp,#8] + eor r11,r10,r7 + vshr.u32 q8,q12,#30 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + add r4,r4,r11 + eor r10,r5,r7 + vsli.32 q8,q12,#2 + add r3,r3,r9 + ldr r9,[sp,#12] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#16] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q3,q8,#8 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#20] + veor q9,q9,q1 + eor r11,r10,r4 + add r6,r6,r7,ror#27 + veor q9,q9,q10 + mov r3,r3,ror#2 + add r6,r6,r11 + vadd.i32 q13,q8,q14 + eor r10,r7,r4 + add r5,r5,r9 + veor q12,q12,q9 + ldr r9,[sp,#24] + eor r11,r10,r3 + vshr.u32 q9,q12,#30 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + vst1.32 {q13},[r12,:128]! + add r5,r5,r11 + eor r10,r6,r3 + vsli.32 q9,q12,#2 + add r4,r4,r9 + ldr r9,[sp,#28] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#32] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q8,q9,#8 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#36] + veor q10,q10,q2 + add r7,r7,r3,ror#27 + eor r11,r5,r6 + veor q10,q10,q11 + add r7,r7,r10 + and r11,r11,r4 + vadd.i32 q13,q9,q14 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q12,q12,q10 + add r6,r6,r9 + and r10,r4,r5 + vshr.u32 q10,q12,#30 + ldr r9,[sp,#40] + add r6,r6,r7,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r4,r5 + add r6,r6,r10 + vsli.32 q10,q12,#2 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#44] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#48] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q9,q10,#8 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#52] + veor q11,q11,q3 + add r3,r3,r4,ror#27 + eor r11,r6,r7 + veor q11,q11,q0 + add r3,r3,r10 + and r11,r11,r5 + vadd.i32 q13,q10,q14 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + veor q12,q12,q11 + add r7,r7,r9 + and r10,r5,r6 + vshr.u32 q11,q12,#30 + ldr r9,[sp,#56] + add r7,r7,r3,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r5,r6 + add r7,r7,r10 + vsli.32 q11,q12,#2 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#60] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#0] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q10,q11,#8 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#4] + veor q0,q0,q8 + add r4,r4,r5,ror#27 + eor r11,r7,r3 + veor q0,q0,q1 + add r4,r4,r10 + and r11,r11,r6 + vadd.i32 q13,q11,q14 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q12,q12,q0 + add r3,r3,r9 + and r10,r6,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r3,r3,r4,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r6,r7 + add r3,r3,r10 + vsli.32 q0,q12,#2 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#12] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#16] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q11,q0,#8 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#20] + veor q1,q1,q9 + add r5,r5,r6,ror#27 + eor r11,r3,r4 + veor q1,q1,q2 + add r5,r5,r10 + and r11,r11,r7 + vadd.i32 q13,q0,q14 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q12,q12,q1 + add r4,r4,r9 + and r10,r7,r3 + vshr.u32 q1,q12,#30 + ldr r9,[sp,#24] + add r4,r4,r5,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r7,r3 + add r4,r4,r10 + vsli.32 q1,q12,#2 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#28] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#32] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q0,q1,#8 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#36] + veor q2,q2,q10 + add r6,r6,r7,ror#27 + eor r11,r4,r5 + veor q2,q2,q3 + add r6,r6,r10 + and r11,r11,r3 + vadd.i32 q13,q1,q14 + mov r3,r3,ror#2 + add r6,r6,r11 + veor q12,q12,q2 + add r5,r5,r9 + and r10,r3,r4 + vshr.u32 q2,q12,#30 + ldr r9,[sp,#40] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r3,r4 + add r5,r5,r10 + vsli.32 q2,q12,#2 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#44] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#48] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q1,q2,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q3,q3,q8 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q2,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r4 + vshr.u32 q3,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q3,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#60] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#0] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q3,q14 + eor r10,r5,r7 + add r3,r3,r9 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + teq r1,r2 + sub r8,r8,#16 + it eq + subeq r1,r1,#64 + vld1.8 {q0,q1},[r1]! + ldr r9,[sp,#4] + eor r11,r10,r6 + vld1.8 {q2,q3},[r1]! + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + eor r10,r4,r6 + vrev32.8 q0,q0 + add r7,r7,r9 + ldr r9,[sp,#8] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#12] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#16] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vrev32.8 q1,q1 + eor r10,r6,r3 + add r4,r4,r9 + vadd.i32 q8,q0,q14 + ldr r9,[sp,#20] + eor r11,r10,r7 + vst1.32 {q8},[r12,:128]! + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#24] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#28] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#32] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vrev32.8 q2,q2 + eor r10,r7,r4 + add r5,r5,r9 + vadd.i32 q9,q1,q14 + ldr r9,[sp,#36] + eor r11,r10,r3 + vst1.32 {q9},[r12,:128]! + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#40] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#44] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#48] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vrev32.8 q3,q3 + eor r10,r3,r5 + add r6,r6,r9 + vadd.i32 q10,q2,q14 + ldr r9,[sp,#52] + eor r11,r10,r4 + vst1.32 {q10},[r12,:128]! + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#56] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#60] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + ldmia r0,{r9,r10,r11,r12} @ accumulate context + add r3,r3,r9 + ldr r9,[r0,#16] + add r4,r4,r10 + add r5,r5,r11 + add r6,r6,r12 + it eq + moveq sp,r14 + add r7,r7,r9 + it ne + ldrne r9,[sp] + stmia r0,{r3,r4,r5,r6,r7} + itt ne + addne r12,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 + +# if defined(__thumb2__) +# define INST(a,b,c,d) .byte c,d|0xf,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d|0x10 +# endif + +.globl sha1_block_data_order_hw +.hidden sha1_block_data_order_hw +.type sha1_block_data_order_hw,%function +.align 5 +sha1_block_data_order_hw: + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + + veor q1,q1,q1 + adr r3,.LK_00_19 + vld1.32 {q0},[r0]! + vld1.32 {d2[0]},[r0] + sub r0,r0,#16 + vld1.32 {d16[],d17[]},[r3,:32]! + vld1.32 {d18[],d19[]},[r3,:32]! + vld1.32 {d20[],d21[]},[r3,:32]! + vld1.32 {d22[],d23[]},[r3,:32] + +.Loop_v8: + vld1.8 {q4,q5},[r1]! + vld1.8 {q6,q7},[r1]! + vrev32.8 q4,q4 + vrev32.8 q5,q5 + + vadd.i32 q12,q8,q4 + vrev32.8 q6,q6 + vmov q14,q0 @ offload + subs r2,r2,#1 + + vadd.i32 q13,q8,q5 + vrev32.8 q7,q7 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 0 + INST(0x68,0x0c,0x02,0xe2) @ sha1c q0,q1,q12 + vadd.i32 q12,q8,q6 + INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 1 + INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13 + vadd.i32 q13,q8,q7 + INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 + INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 2 + INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12 + vadd.i32 q12,q8,q4 + INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 + INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 3 + INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13 + vadd.i32 q13,q9,q5 + INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 + INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 4 + INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12 + vadd.i32 q12,q9,q6 + INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 + INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 5 + INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q7 + INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 + INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 6 + INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 + vadd.i32 q12,q9,q4 + INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 + INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 7 + INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q5 + INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 + INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 8 + INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 + vadd.i32 q12,q10,q6 + INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 + INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 9 + INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 + vadd.i32 q13,q10,q7 + INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 + INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 10 + INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q4 + INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 + INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 11 + INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13 + vadd.i32 q13,q10,q5 + INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 + INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 12 + INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q6 + INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 + INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 13 + INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13 + vadd.i32 q13,q11,q7 + INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 + INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 14 + INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12 + vadd.i32 q12,q11,q4 + INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 + INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 15 + INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q5 + INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 + INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 16 + INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 + vadd.i32 q12,q11,q6 + INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 17 + INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q7 + + INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 18 + INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 + + INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 19 + INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 + + vadd.i32 q1,q1,q2 + vadd.i32 q0,q0,q14 + bne .Loop_v8 + + vst1.32 {q0},[r0]! + vst1.32 {d2[0]},[r0] + + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + bx lr @ bx lr +.size sha1_block_data_order_hw,.-sha1_block_data_order_hw +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-apple.S new file mode 100644 index 0000000000..8f847749e3 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-apple.S @@ -0,0 +1,1218 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.text + +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw + +.align 6 +_sha1_block_data_order_nohw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __AARCH64EB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret + +.globl _sha1_block_data_order_hw +.private_extern _sha1_block_data_order_hw + +.align 6 +_sha1_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adrp x4,Lconst@PAGE + add x4,x4,Lconst@PAGEOFF + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4] + +Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b +.long 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 1 +.long 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 2 +.long 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 3 +.long 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 4 +.long 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 5 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 6 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 7 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 8 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 9 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 10 +.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 11 +.long 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 12 +.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 13 +.long 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 14 +.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 15 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 16 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 17 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + +.long 0x5e280803 //sha1h v3.16b,v0.16b // 18 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + +.long 0x5e280802 //sha1h v2.16b,v0.16b // 19 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret + +.section __TEXT,__const +.align 6 +Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-linux.S new file mode 100644 index 0000000000..f2df2dd30c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-linux.S @@ -0,0 +1,1218 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.text + +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,%function +.align 6 +sha1_block_data_order_nohw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +.Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __AARCH64EB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw +.globl sha1_block_data_order_hw +.hidden sha1_block_data_order_hw +.type sha1_block_data_order_hw,%function +.align 6 +sha1_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adrp x4,.Lconst + add x4,x4,:lo12:.Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4] + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b +.inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 +.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 +.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 +.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 +.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 +.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 +.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret +.size sha1_block_data_order_hw,.-sha1_block_data_order_hw +.section .rodata +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-win.S new file mode 100644 index 0000000000..f8c8b86121 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-armv8-win.S @@ -0,0 +1,1222 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.text + +.globl sha1_block_data_order_nohw + +.def sha1_block_data_order_nohw + .type 32 +.endef +.align 6 +sha1_block_data_order_nohw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __AARCH64EB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret + +.globl sha1_block_data_order_hw + +.def sha1_block_data_order_hw + .type 32 +.endef +.align 6 +sha1_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adrp x4,Lconst + add x4,x4,:lo12:Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4] + +Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b +.long 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 1 +.long 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 2 +.long 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 3 +.long 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 4 +.long 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 5 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 6 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 7 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 8 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 9 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 10 +.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 11 +.long 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 12 +.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 13 +.long 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s +.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 14 +.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s +.long 0x5e281885 //sha1su1 v5.16b,v4.16b +.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 15 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s +.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.long 0x5e280803 //sha1h v3.16b,v0.16b // 16 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s +.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.long 0x5e280802 //sha1h v2.16b,v0.16b // 17 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + +.long 0x5e280803 //sha1h v3.16b,v0.16b // 18 +.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + +.long 0x5e280802 //sha1h v2.16b,v0.16b // 19 +.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret + +.section .rodata +.align 6 +Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-apple.S new file mode 100644 index 0000000000..a1ea1e6bf9 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-apple.S @@ -0,0 +1,5450 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw + +.p2align 4 +_sha1_block_data_order_nohw: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) + +L$prologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 +L$loop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue: + ret + + +.globl _sha1_block_data_order_hw +.private_extern _sha1_block_data_order_hw + +.p2align 5 +_sha1_block_data_order_hw: + +_CET_ENDBR + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + decq %rdx + leaq 64(%rsi),%r8 + paddd %xmm4,%xmm1 + cmovneq %r8,%rsi + prefetcht0 512(%rsi) + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz L$oop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + ret + + +.globl _sha1_block_data_order_ssse3 +.private_extern _sha1_block_data_order_ssse3 + +.p2align 4 +_sha1_block_data_order_ssse3: + +_CET_ENDBR + movq %rsp,%r11 + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + leaq -64(%rsp),%rsp + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r14),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r14),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r14),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r14),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r14),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r14),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movq -40(%r11),%r14 + + movq -32(%r11),%r13 + + movq -24(%r11),%r12 + + movq -16(%r11),%rbp + + movq -8(%r11),%rbx + + leaq (%r11),%rsp + +L$epilogue_ssse3: + ret + + +.globl _sha1_block_data_order_avx +.private_extern _sha1_block_data_order_avx + +.p2align 4 +_sha1_block_data_order_avx: + +_CET_ENDBR + movq %rsp,%r11 + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + leaq -64(%rsp),%rsp + vzeroupper + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp L$oop_avx +.p2align 4 +L$oop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r14),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r14),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r14),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je L$done_avx + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp L$oop_avx + +.p2align 4 +L$done_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movq -40(%r11),%r14 + + movq -32(%r11),%r13 + + movq -24(%r11),%r12 + + movq -16(%r11),%rbp + + movq -8(%r11),%rbx + + leaq (%r11),%rsp + +L$epilogue_avx: + ret + + +.globl _sha1_block_data_order_avx2 +.private_extern _sha1_block_data_order_avx2 + +.p2align 4 +_sha1_block_data_order_avx2: + +_CET_ENDBR + movq %rsp,%r11 + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + vzeroupper + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r14),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp L$oop_avx2 +.p2align 5 +L$oop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp L$align32_1 +.p2align 5 +L$align32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r14),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r14),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp L$align32_2 +.p2align 5 +L$align32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je L$done_avx2 + vmovdqu 64(%r14),%ymm6 + cmpq %r10,%rdi + ja L$ast_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp L$ast_avx2 + +.p2align 5 +L$ast_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp L$align32_3 +.p2align 5 +L$align32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe L$oop_avx2 + +L$done_avx2: + vzeroupper + movq -40(%r11),%r14 + + movq -32(%r11),%r13 + + movq -24(%r11),%r12 + + movq -16(%r11),%rbp + + movq -8(%r11),%rbx + + leaq (%r11),%rsp + +L$epilogue_avx2: + ret + + +.section __DATA,__const +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-linux.S new file mode 100644 index 0000000000..39d9ad3841 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-linux.S @@ -0,0 +1,5450 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,@function +.align 16 +sha1_block_data_order_nohw: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08 +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + ret +.cfi_endproc +.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw +.globl sha1_block_data_order_hw +.hidden sha1_block_data_order_hw +.type sha1_block_data_order_hw,@function +.align 32 +sha1_block_data_order_hw: +.cfi_startproc +_CET_ENDBR + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + decq %rdx + leaq 64(%rsi),%r8 + paddd %xmm4,%xmm1 + cmovneq %r8,%rsi + prefetcht0 512(%rsi) + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz .Loop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + ret +.cfi_endproc +.size sha1_block_data_order_hw,.-sha1_block_data_order_hw +.globl sha1_block_data_order_ssse3 +.hidden sha1_block_data_order_ssse3 +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +.cfi_startproc +_CET_ENDBR + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + leaq -64(%rsp),%rsp + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r14),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r14),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r14),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r14),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r14),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r14),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_ssse3: + ret +.cfi_endproc +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.globl sha1_block_data_order_avx +.hidden sha1_block_data_order_avx +.type sha1_block_data_order_avx,@function +.align 16 +sha1_block_data_order_avx: +.cfi_startproc +_CET_ENDBR + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + leaq -64(%rsp),%rsp + vzeroupper + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp .Loop_avx +.align 16 +.Loop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r14),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r14),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r14),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_avx + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_avx + +.align 16 +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + ret +.cfi_endproc +.size sha1_block_data_order_avx,.-sha1_block_data_order_avx +.globl sha1_block_data_order_avx2 +.hidden sha1_block_data_order_avx2 +.type sha1_block_data_order_avx2,@function +.align 16 +sha1_block_data_order_avx2: +.cfi_startproc +_CET_ENDBR + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + vzeroupper + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r14),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp .Loop_avx2 +.align 32 +.Loop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp .Lalign32_1 +.align 32 +.Lalign32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r14),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r14),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp .Lalign32_2 +.align 32 +.Lalign32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je .Ldone_avx2 + vmovdqu 64(%r14),%ymm6 + cmpq %r10,%rdi + ja .Last_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp .Last_avx2 + +.align 32 +.Last_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp .Lalign32_3 +.align 32 +.Lalign32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe .Loop_avx2 + +.Ldone_avx2: + vzeroupper + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + ret +.cfi_endproc +.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 +.section .rodata +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-win.asm new file mode 100644 index 0000000000..92e9b9c455 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha1-x86_64-win.asm @@ -0,0 +1,5768 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +global sha1_block_data_order_nohw + +ALIGN 16 +sha1_block_data_order_nohw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_nohw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + mov r8,rdi + sub rsp,72 + mov r9,rsi + and rsp,-64 + mov r10,rdx + mov QWORD[64+rsp],rax + +$L$prologue: + + mov esi,DWORD[r8] + mov edi,DWORD[4+r8] + mov r11d,DWORD[8+r8] + mov r12d,DWORD[12+r8] + mov r13d,DWORD[16+r8] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov edx,DWORD[r9] + bswap edx + mov ebp,DWORD[4+r9] + mov eax,r12d + mov DWORD[rsp],edx + mov ecx,esi + bswap ebp + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+rdx] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov r14d,DWORD[8+r9] + mov eax,r11d + mov DWORD[4+rsp],ebp + mov ecx,r13d + bswap r14d + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+rbp] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov edx,DWORD[12+r9] + mov eax,edi + mov DWORD[8+rsp],r14d + mov ecx,r12d + bswap edx + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+r14] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov ebp,DWORD[16+r9] + mov eax,esi + mov DWORD[12+rsp],edx + mov ecx,r11d + bswap ebp + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+rdx] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov r14d,DWORD[20+r9] + mov eax,r13d + mov DWORD[16+rsp],ebp + mov ecx,edi + bswap r14d + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+rbp] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD[24+r9] + mov eax,r12d + mov DWORD[20+rsp],r14d + mov ecx,esi + bswap edx + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+r14] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov ebp,DWORD[28+r9] + mov eax,r11d + mov DWORD[24+rsp],edx + mov ecx,r13d + bswap ebp + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+rdx] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov r14d,DWORD[32+r9] + mov eax,edi + mov DWORD[28+rsp],ebp + mov ecx,r12d + bswap r14d + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+rbp] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov edx,DWORD[36+r9] + mov eax,esi + mov DWORD[32+rsp],r14d + mov ecx,r11d + bswap edx + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+r14] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov ebp,DWORD[40+r9] + mov eax,r13d + mov DWORD[36+rsp],edx + mov ecx,edi + bswap ebp + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+rdx] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov r14d,DWORD[44+r9] + mov eax,r12d + mov DWORD[40+rsp],ebp + mov ecx,esi + bswap r14d + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+rbp] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov edx,DWORD[48+r9] + mov eax,r11d + mov DWORD[44+rsp],r14d + mov ecx,r13d + bswap edx + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+r14] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov ebp,DWORD[52+r9] + mov eax,edi + mov DWORD[48+rsp],edx + mov ecx,r12d + bswap ebp + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+rdx] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov r14d,DWORD[56+r9] + mov eax,esi + mov DWORD[52+rsp],ebp + mov ecx,r11d + bswap r14d + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+rbp] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov edx,DWORD[60+r9] + mov eax,r13d + mov DWORD[56+rsp],r14d + mov ecx,edi + bswap edx + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+r14] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + xor ebp,DWORD[rsp] + mov eax,r12d + mov DWORD[60+rsp],edx + mov ecx,esi + xor ebp,DWORD[8+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[32+rsp] + and eax,edi + lea r13d,[1518500249+r13*1+rdx] + rol edi,30 + xor eax,r12d + add r13d,ecx + rol ebp,1 + add r13d,eax + xor r14d,DWORD[4+rsp] + mov eax,r11d + mov DWORD[rsp],ebp + mov ecx,r13d + xor r14d,DWORD[12+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[36+rsp] + and eax,esi + lea r12d,[1518500249+r12*1+rbp] + rol esi,30 + xor eax,r11d + add r12d,ecx + rol r14d,1 + add r12d,eax + xor edx,DWORD[8+rsp] + mov eax,edi + mov DWORD[4+rsp],r14d + mov ecx,r12d + xor edx,DWORD[16+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[40+rsp] + and eax,r13d + lea r11d,[1518500249+r11*1+r14] + rol r13d,30 + xor eax,edi + add r11d,ecx + rol edx,1 + add r11d,eax + xor ebp,DWORD[12+rsp] + mov eax,esi + mov DWORD[8+rsp],edx + mov ecx,r11d + xor ebp,DWORD[20+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[44+rsp] + and eax,r12d + lea edi,[1518500249+rdi*1+rdx] + rol r12d,30 + xor eax,esi + add edi,ecx + rol ebp,1 + add edi,eax + xor r14d,DWORD[16+rsp] + mov eax,r13d + mov DWORD[12+rsp],ebp + mov ecx,edi + xor r14d,DWORD[24+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[48+rsp] + and eax,r11d + lea esi,[1518500249+rsi*1+rbp] + rol r11d,30 + xor eax,r13d + add esi,ecx + rol r14d,1 + add esi,eax + xor edx,DWORD[20+rsp] + mov eax,edi + mov DWORD[16+rsp],r14d + mov ecx,esi + xor edx,DWORD[28+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[52+rsp] + lea r13d,[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[24+rsp] + mov eax,esi + mov DWORD[20+rsp],edx + mov ecx,r13d + xor ebp,DWORD[32+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[56+rsp] + lea r12d,[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[28+rsp] + mov eax,r13d + mov DWORD[24+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[36+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[60+rsp] + lea r11d,[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[32+rsp] + mov eax,r12d + mov DWORD[28+rsp],r14d + mov ecx,r11d + xor edx,DWORD[40+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[rsp] + lea edi,[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[36+rsp] + mov eax,r11d + mov DWORD[32+rsp],edx + mov ecx,edi + xor ebp,DWORD[44+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[4+rsp] + lea esi,[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[40+rsp] + mov eax,edi + mov DWORD[36+rsp],ebp + mov ecx,esi + xor r14d,DWORD[48+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[8+rsp] + lea r13d,[1859775393+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD[44+rsp] + mov eax,esi + mov DWORD[40+rsp],r14d + mov ecx,r13d + xor edx,DWORD[52+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD[12+rsp] + lea r12d,[1859775393+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD[48+rsp] + mov eax,r13d + mov DWORD[44+rsp],edx + mov ecx,r12d + xor ebp,DWORD[56+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD[16+rsp] + lea r11d,[1859775393+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD[52+rsp] + mov eax,r12d + mov DWORD[48+rsp],ebp + mov ecx,r11d + xor r14d,DWORD[60+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD[20+rsp] + lea edi,[1859775393+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD[56+rsp] + mov eax,r11d + mov DWORD[52+rsp],r14d + mov ecx,edi + xor edx,DWORD[rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD[24+rsp] + lea esi,[1859775393+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD[60+rsp] + mov eax,edi + mov DWORD[56+rsp],edx + mov ecx,esi + xor ebp,DWORD[4+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[28+rsp] + lea r13d,[1859775393+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[rsp] + mov eax,esi + mov DWORD[60+rsp],ebp + mov ecx,r13d + xor r14d,DWORD[8+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[32+rsp] + lea r12d,[1859775393+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[4+rsp] + mov eax,r13d + mov DWORD[rsp],r14d + mov ecx,r12d + xor edx,DWORD[12+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[36+rsp] + lea r11d,[1859775393+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[8+rsp] + mov eax,r12d + mov DWORD[4+rsp],edx + mov ecx,r11d + xor ebp,DWORD[16+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[40+rsp] + lea edi,[1859775393+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD[12+rsp] + mov eax,r11d + mov DWORD[8+rsp],ebp + mov ecx,edi + xor r14d,DWORD[20+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD[44+rsp] + lea esi,[1859775393+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD[16+rsp] + mov eax,edi + mov DWORD[12+rsp],r14d + mov ecx,esi + xor edx,DWORD[24+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[48+rsp] + lea r13d,[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[20+rsp] + mov eax,esi + mov DWORD[16+rsp],edx + mov ecx,r13d + xor ebp,DWORD[28+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[52+rsp] + lea r12d,[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[24+rsp] + mov eax,r13d + mov DWORD[20+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[32+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[56+rsp] + lea r11d,[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[28+rsp] + mov eax,r12d + mov DWORD[24+rsp],r14d + mov ecx,r11d + xor edx,DWORD[36+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[60+rsp] + lea edi,[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[32+rsp] + mov eax,r11d + mov DWORD[28+rsp],edx + mov ecx,edi + xor ebp,DWORD[40+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[rsp] + lea esi,[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[36+rsp] + mov eax,r12d + mov DWORD[32+rsp],ebp + mov ebx,r12d + xor r14d,DWORD[44+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD[4+rsp] + lea r13d,[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD[40+rsp] + mov eax,r11d + mov DWORD[36+rsp],r14d + mov ebx,r11d + xor edx,DWORD[48+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD[8+rsp] + lea r12d,[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD[44+rsp] + mov eax,edi + mov DWORD[40+rsp],edx + mov ebx,edi + xor ebp,DWORD[52+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD[12+rsp] + lea r11d,[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD[48+rsp] + mov eax,esi + mov DWORD[44+rsp],ebp + mov ebx,esi + xor r14d,DWORD[56+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD[16+rsp] + lea edi,[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD[52+rsp] + mov eax,r13d + mov DWORD[48+rsp],r14d + mov ebx,r13d + xor edx,DWORD[60+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD[20+rsp] + lea esi,[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD[56+rsp] + mov eax,r12d + mov DWORD[52+rsp],edx + mov ebx,r12d + xor ebp,DWORD[rsp] + and eax,r11d + mov ecx,esi + xor ebp,DWORD[24+rsp] + lea r13d,[((-1894007588))+r13*1+rdx] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol ebp,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor r14d,DWORD[60+rsp] + mov eax,r11d + mov DWORD[56+rsp],ebp + mov ebx,r11d + xor r14d,DWORD[4+rsp] + and eax,edi + mov ecx,r13d + xor r14d,DWORD[28+rsp] + lea r12d,[((-1894007588))+r12*1+rbp] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol r14d,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor edx,DWORD[rsp] + mov eax,edi + mov DWORD[60+rsp],r14d + mov ebx,edi + xor edx,DWORD[8+rsp] + and eax,esi + mov ecx,r12d + xor edx,DWORD[32+rsp] + lea r11d,[((-1894007588))+r11*1+r14] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol edx,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor ebp,DWORD[4+rsp] + mov eax,esi + mov DWORD[rsp],edx + mov ebx,esi + xor ebp,DWORD[12+rsp] + and eax,r13d + mov ecx,r11d + xor ebp,DWORD[36+rsp] + lea edi,[((-1894007588))+rdi*1+rdx] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol ebp,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor r14d,DWORD[8+rsp] + mov eax,r13d + mov DWORD[4+rsp],ebp + mov ebx,r13d + xor r14d,DWORD[16+rsp] + and eax,r12d + mov ecx,edi + xor r14d,DWORD[40+rsp] + lea esi,[((-1894007588))+rsi*1+rbp] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol r14d,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor edx,DWORD[12+rsp] + mov eax,r12d + mov DWORD[8+rsp],r14d + mov ebx,r12d + xor edx,DWORD[20+rsp] + and eax,r11d + mov ecx,esi + xor edx,DWORD[44+rsp] + lea r13d,[((-1894007588))+r13*1+r14] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol edx,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor ebp,DWORD[16+rsp] + mov eax,r11d + mov DWORD[12+rsp],edx + mov ebx,r11d + xor ebp,DWORD[24+rsp] + and eax,edi + mov ecx,r13d + xor ebp,DWORD[48+rsp] + lea r12d,[((-1894007588))+r12*1+rdx] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol ebp,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor r14d,DWORD[20+rsp] + mov eax,edi + mov DWORD[16+rsp],ebp + mov ebx,edi + xor r14d,DWORD[28+rsp] + and eax,esi + mov ecx,r12d + xor r14d,DWORD[52+rsp] + lea r11d,[((-1894007588))+r11*1+rbp] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol r14d,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor edx,DWORD[24+rsp] + mov eax,esi + mov DWORD[20+rsp],r14d + mov ebx,esi + xor edx,DWORD[32+rsp] + and eax,r13d + mov ecx,r11d + xor edx,DWORD[56+rsp] + lea edi,[((-1894007588))+rdi*1+r14] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol edx,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor ebp,DWORD[28+rsp] + mov eax,r13d + mov DWORD[24+rsp],edx + mov ebx,r13d + xor ebp,DWORD[36+rsp] + and eax,r12d + mov ecx,edi + xor ebp,DWORD[60+rsp] + lea esi,[((-1894007588))+rsi*1+rdx] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol ebp,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor r14d,DWORD[32+rsp] + mov eax,r12d + mov DWORD[28+rsp],ebp + mov ebx,r12d + xor r14d,DWORD[40+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD[rsp] + lea r13d,[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD[36+rsp] + mov eax,r11d + mov DWORD[32+rsp],r14d + mov ebx,r11d + xor edx,DWORD[44+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD[4+rsp] + lea r12d,[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD[40+rsp] + mov eax,edi + mov DWORD[36+rsp],edx + mov ebx,edi + xor ebp,DWORD[48+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD[8+rsp] + lea r11d,[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD[44+rsp] + mov eax,esi + mov DWORD[40+rsp],ebp + mov ebx,esi + xor r14d,DWORD[52+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD[12+rsp] + lea edi,[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD[48+rsp] + mov eax,r13d + mov DWORD[44+rsp],r14d + mov ebx,r13d + xor edx,DWORD[56+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD[16+rsp] + lea esi,[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD[52+rsp] + mov eax,edi + mov DWORD[48+rsp],edx + mov ecx,esi + xor ebp,DWORD[60+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[20+rsp] + lea r13d,[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[56+rsp] + mov eax,esi + mov DWORD[52+rsp],ebp + mov ecx,r13d + xor r14d,DWORD[rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[24+rsp] + lea r12d,[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[60+rsp] + mov eax,r13d + mov DWORD[56+rsp],r14d + mov ecx,r12d + xor edx,DWORD[4+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[28+rsp] + lea r11d,[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[rsp] + mov eax,r12d + mov DWORD[60+rsp],edx + mov ecx,r11d + xor ebp,DWORD[8+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[32+rsp] + lea edi,[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD[4+rsp] + mov eax,r11d + mov DWORD[rsp],ebp + mov ecx,edi + xor r14d,DWORD[12+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD[36+rsp] + lea esi,[((-899497514))+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD[8+rsp] + mov eax,edi + mov DWORD[4+rsp],r14d + mov ecx,esi + xor edx,DWORD[16+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[40+rsp] + lea r13d,[((-899497514))+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[12+rsp] + mov eax,esi + mov DWORD[8+rsp],edx + mov ecx,r13d + xor ebp,DWORD[20+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[44+rsp] + lea r12d,[((-899497514))+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[16+rsp] + mov eax,r13d + mov DWORD[12+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[24+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[48+rsp] + lea r11d,[((-899497514))+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[20+rsp] + mov eax,r12d + mov DWORD[16+rsp],r14d + mov ecx,r11d + xor edx,DWORD[28+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[52+rsp] + lea edi,[((-899497514))+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[24+rsp] + mov eax,r11d + mov DWORD[20+rsp],edx + mov ecx,edi + xor ebp,DWORD[32+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[56+rsp] + lea esi,[((-899497514))+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[28+rsp] + mov eax,edi + mov DWORD[24+rsp],ebp + mov ecx,esi + xor r14d,DWORD[36+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[60+rsp] + lea r13d,[((-899497514))+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD[32+rsp] + mov eax,esi + mov DWORD[28+rsp],r14d + mov ecx,r13d + xor edx,DWORD[40+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD[rsp] + lea r12d,[((-899497514))+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD[36+rsp] + mov eax,r13d + + mov ecx,r12d + xor ebp,DWORD[44+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD[4+rsp] + lea r11d,[((-899497514))+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD[40+rsp] + mov eax,r12d + + mov ecx,r11d + xor r14d,DWORD[48+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD[8+rsp] + lea edi,[((-899497514))+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD[44+rsp] + mov eax,r11d + + mov ecx,edi + xor edx,DWORD[52+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD[12+rsp] + lea esi,[((-899497514))+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD[48+rsp] + mov eax,edi + + mov ecx,esi + xor ebp,DWORD[56+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[16+rsp] + lea r13d,[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[52+rsp] + mov eax,esi + + mov ecx,r13d + xor r14d,DWORD[60+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[20+rsp] + lea r12d,[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[56+rsp] + mov eax,r13d + + mov ecx,r12d + xor edx,DWORD[rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[24+rsp] + lea r11d,[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[60+rsp] + mov eax,r12d + + mov ecx,r11d + xor ebp,DWORD[4+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[28+rsp] + lea edi,[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r11d + mov ecx,edi + xor eax,r13d + lea esi,[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + add esi,DWORD[r8] + add edi,DWORD[4+r8] + add r11d,DWORD[8+r8] + add r12d,DWORD[12+r8] + add r13d,DWORD[16+r8] + mov DWORD[r8],esi + mov DWORD[4+r8],edi + mov DWORD[8+r8],r11d + mov DWORD[12+r8],r12d + mov DWORD[16+r8],r13d + + sub r10,1 + lea r9,[64+r9] + jnz NEAR $L$loop + + mov rsi,QWORD[64+rsp] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha1_block_data_order_nohw: +global sha1_block_data_order_hw + +ALIGN 32 +sha1_block_data_order_hw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_hw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rsp,[((-72))+rsp] + movaps XMMWORD[(-8-64)+rax],xmm6 + movaps XMMWORD[(-8-48)+rax],xmm7 + movaps XMMWORD[(-8-32)+rax],xmm8 + movaps XMMWORD[(-8-16)+rax],xmm9 +$L$prologue_shaext: + movdqu xmm0,XMMWORD[rdi] + movd xmm1,DWORD[16+rdi] + movdqa xmm3,XMMWORD[((K_XX_XX+160))] + + movdqu xmm4,XMMWORD[rsi] + pshufd xmm0,xmm0,27 + movdqu xmm5,XMMWORD[16+rsi] + pshufd xmm1,xmm1,27 + movdqu xmm6,XMMWORD[32+rsi] +DB 102,15,56,0,227 + movdqu xmm7,XMMWORD[48+rsi] +DB 102,15,56,0,235 +DB 102,15,56,0,243 + movdqa xmm9,xmm1 +DB 102,15,56,0,251 + jmp NEAR $L$oop_shaext + +ALIGN 16 +$L$oop_shaext: + dec rdx + lea r8,[64+rsi] + paddd xmm1,xmm4 + cmovne rsi,r8 + prefetcht0 [512+rsi] + movdqa xmm8,xmm0 + DB 15,56,201,229 + movdqa xmm2,xmm0 + DB 15,58,204,193,0 + DB 15,56,200,213 + pxor xmm4,xmm6 + DB 15,56,201,238 + DB 15,56,202,231 + + movdqa xmm1,xmm0 + DB 15,58,204,194,0 + DB 15,56,200,206 + pxor xmm5,xmm7 + DB 15,56,202,236 + DB 15,56,201,247 + movdqa xmm2,xmm0 + DB 15,58,204,193,0 + DB 15,56,200,215 + pxor xmm6,xmm4 + DB 15,56,201,252 + DB 15,56,202,245 + + movdqa xmm1,xmm0 + DB 15,58,204,194,0 + DB 15,56,200,204 + pxor xmm7,xmm5 + DB 15,56,202,254 + DB 15,56,201,229 + movdqa xmm2,xmm0 + DB 15,58,204,193,0 + DB 15,56,200,213 + pxor xmm4,xmm6 + DB 15,56,201,238 + DB 15,56,202,231 + + movdqa xmm1,xmm0 + DB 15,58,204,194,1 + DB 15,56,200,206 + pxor xmm5,xmm7 + DB 15,56,202,236 + DB 15,56,201,247 + movdqa xmm2,xmm0 + DB 15,58,204,193,1 + DB 15,56,200,215 + pxor xmm6,xmm4 + DB 15,56,201,252 + DB 15,56,202,245 + + movdqa xmm1,xmm0 + DB 15,58,204,194,1 + DB 15,56,200,204 + pxor xmm7,xmm5 + DB 15,56,202,254 + DB 15,56,201,229 + movdqa xmm2,xmm0 + DB 15,58,204,193,1 + DB 15,56,200,213 + pxor xmm4,xmm6 + DB 15,56,201,238 + DB 15,56,202,231 + + movdqa xmm1,xmm0 + DB 15,58,204,194,1 + DB 15,56,200,206 + pxor xmm5,xmm7 + DB 15,56,202,236 + DB 15,56,201,247 + movdqa xmm2,xmm0 + DB 15,58,204,193,2 + DB 15,56,200,215 + pxor xmm6,xmm4 + DB 15,56,201,252 + DB 15,56,202,245 + + movdqa xmm1,xmm0 + DB 15,58,204,194,2 + DB 15,56,200,204 + pxor xmm7,xmm5 + DB 15,56,202,254 + DB 15,56,201,229 + movdqa xmm2,xmm0 + DB 15,58,204,193,2 + DB 15,56,200,213 + pxor xmm4,xmm6 + DB 15,56,201,238 + DB 15,56,202,231 + + movdqa xmm1,xmm0 + DB 15,58,204,194,2 + DB 15,56,200,206 + pxor xmm5,xmm7 + DB 15,56,202,236 + DB 15,56,201,247 + movdqa xmm2,xmm0 + DB 15,58,204,193,2 + DB 15,56,200,215 + pxor xmm6,xmm4 + DB 15,56,201,252 + DB 15,56,202,245 + + movdqa xmm1,xmm0 + DB 15,58,204,194,3 + DB 15,56,200,204 + pxor xmm7,xmm5 + DB 15,56,202,254 + movdqu xmm4,XMMWORD[rsi] + movdqa xmm2,xmm0 + DB 15,58,204,193,3 + DB 15,56,200,213 + movdqu xmm5,XMMWORD[16+rsi] +DB 102,15,56,0,227 + + movdqa xmm1,xmm0 + DB 15,58,204,194,3 + DB 15,56,200,206 + movdqu xmm6,XMMWORD[32+rsi] +DB 102,15,56,0,235 + + movdqa xmm2,xmm0 + DB 15,58,204,193,3 + DB 15,56,200,215 + movdqu xmm7,XMMWORD[48+rsi] +DB 102,15,56,0,243 + + movdqa xmm1,xmm0 + DB 15,58,204,194,3 + DB 65,15,56,200,201 +DB 102,15,56,0,251 + + paddd xmm0,xmm8 + movdqa xmm9,xmm1 + + jnz NEAR $L$oop_shaext + + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu XMMWORD[rdi],xmm0 + movd DWORD[16+rdi],xmm1 + movaps xmm6,XMMWORD[((-8-64))+rax] + movaps xmm7,XMMWORD[((-8-48))+rax] + movaps xmm8,XMMWORD[((-8-32))+rax] + movaps xmm9,XMMWORD[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha1_block_data_order_hw: +global sha1_block_data_order_ssse3 + +ALIGN 16 +sha1_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov r11,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + lea rsp,[((-160))+rsp] + movaps XMMWORD[(-40-96)+r11],xmm6 + movaps XMMWORD[(-40-80)+r11],xmm7 + movaps XMMWORD[(-40-64)+r11],xmm8 + movaps XMMWORD[(-40-48)+r11],xmm9 + movaps XMMWORD[(-40-32)+r11],xmm10 + movaps XMMWORD[(-40-16)+r11],xmm11 +$L$prologue_ssse3: + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r14,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,ebx + mov ebp,DWORD[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + movdqa xmm6,XMMWORD[64+r14] + movdqa xmm9,XMMWORD[((-64))+r14] + movdqu xmm0,XMMWORD[r9] + movdqu xmm1,XMMWORD[16+r9] + movdqu xmm2,XMMWORD[32+r9] + movdqu xmm3,XMMWORD[48+r9] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + add r9,64 + paddd xmm0,xmm9 +DB 102,15,56,0,222 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp NEAR $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3: + ror ebx,2 + pshufd xmm4,xmm0,238 + xor esi,edx + movdqa xmm8,xmm3 + paddd xmm9,xmm3 + mov edi,eax + add ebp,DWORD[rsp] + punpcklqdq xmm4,xmm1 + xor ebx,ecx + rol eax,5 + add ebp,esi + psrldq xmm8,4 + and edi,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add ebp,eax + ror eax,7 + pxor xmm8,xmm2 + xor edi,ecx + mov esi,ebp + add edx,DWORD[4+rsp] + pxor xmm4,xmm8 + xor eax,ebx + rol ebp,5 + movdqa XMMWORD[48+rsp],xmm9 + add edx,edi + and esi,eax + movdqa xmm10,xmm4 + xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm8,xmm4 + xor esi,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + add ecx,DWORD[8+rsp] + psrld xmm8,31 + xor ebp,eax + rol edx,5 + add ecx,esi + movdqa xmm9,xmm10 + and edi,ebp + xor ebp,eax + psrld xmm10,30 + add ecx,edx + ror edx,7 + por xmm4,xmm8 + xor edi,eax + mov esi,ecx + add ebx,DWORD[12+rsp] + pslld xmm9,2 + pxor xmm4,xmm10 + xor edx,ebp + movdqa xmm10,XMMWORD[((-64))+r14] + rol ecx,5 + add ebx,edi + and esi,edx + pxor xmm4,xmm9 + xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm5,xmm1,238 + xor esi,ebp + movdqa xmm9,xmm4 + paddd xmm10,xmm4 + mov edi,ebx + add eax,DWORD[16+rsp] + punpcklqdq xmm5,xmm2 + xor ecx,edx + rol ebx,5 + add eax,esi + psrldq xmm9,4 + and edi,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm9,xmm3 + xor edi,edx + mov esi,eax + add ebp,DWORD[20+rsp] + pxor xmm5,xmm9 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD[rsp],xmm10 + add ebp,edi + and esi,ebx + movdqa xmm8,xmm5 + xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm9,xmm5 + xor esi,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + add edx,DWORD[24+rsp] + psrld xmm9,31 + xor eax,ebx + rol ebp,5 + add edx,esi + movdqa xmm10,xmm8 + and edi,eax + xor eax,ebx + psrld xmm8,30 + add edx,ebp + ror ebp,7 + por xmm5,xmm9 + xor edi,ebx + mov esi,edx + add ecx,DWORD[28+rsp] + pslld xmm10,2 + pxor xmm5,xmm8 + xor ebp,eax + movdqa xmm8,XMMWORD[((-32))+r14] + rol edx,5 + add ecx,edi + and esi,ebp + pxor xmm5,xmm10 + xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm6,xmm2,238 + xor esi,eax + movdqa xmm10,xmm5 + paddd xmm8,xmm5 + mov edi,ecx + add ebx,DWORD[32+rsp] + punpcklqdq xmm6,xmm3 + xor edx,ebp + rol ecx,5 + add ebx,esi + psrldq xmm10,4 + and edi,edx + xor edx,ebp + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm10,xmm4 + xor edi,ebp + mov esi,ebx + add eax,DWORD[36+rsp] + pxor xmm6,xmm10 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD[16+rsp],xmm8 + add eax,edi + and esi,ecx + movdqa xmm9,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm10,xmm6 + xor esi,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + add ebp,DWORD[40+rsp] + psrld xmm10,31 + xor ebx,ecx + rol eax,5 + add ebp,esi + movdqa xmm8,xmm9 + and edi,ebx + xor ebx,ecx + psrld xmm9,30 + add ebp,eax + ror eax,7 + por xmm6,xmm10 + xor edi,ecx + mov esi,ebp + add edx,DWORD[44+rsp] + pslld xmm8,2 + pxor xmm6,xmm9 + xor eax,ebx + movdqa xmm9,XMMWORD[((-32))+r14] + rol ebp,5 + add edx,edi + and esi,eax + pxor xmm6,xmm8 + xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm7,xmm3,238 + xor esi,ebx + movdqa xmm8,xmm6 + paddd xmm9,xmm6 + mov edi,edx + add ecx,DWORD[48+rsp] + punpcklqdq xmm7,xmm4 + xor ebp,eax + rol edx,5 + add ecx,esi + psrldq xmm8,4 + and edi,ebp + xor ebp,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm8,xmm5 + xor edi,eax + mov esi,ecx + add ebx,DWORD[52+rsp] + pxor xmm7,xmm8 + xor edx,ebp + rol ecx,5 + movdqa XMMWORD[32+rsp],xmm9 + add ebx,edi + and esi,edx + movdqa xmm10,xmm7 + xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm8,xmm7 + xor esi,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + add eax,DWORD[56+rsp] + psrld xmm8,31 + xor ecx,edx + rol ebx,5 + add eax,esi + movdqa xmm9,xmm10 + and edi,ecx + xor ecx,edx + psrld xmm10,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm8 + xor edi,edx + mov esi,eax + add ebp,DWORD[60+rsp] + pslld xmm9,2 + pxor xmm7,xmm10 + xor ebx,ecx + movdqa xmm10,XMMWORD[((-32))+r14] + rol eax,5 + add ebp,edi + and esi,ebx + pxor xmm7,xmm9 + pshufd xmm9,xmm6,238 + xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm0,xmm4 + xor esi,ecx + mov edi,ebp + add edx,DWORD[rsp] + punpcklqdq xmm9,xmm7 + xor eax,ebx + rol ebp,5 + pxor xmm0,xmm1 + add edx,esi + and edi,eax + movdqa xmm8,xmm10 + xor eax,ebx + paddd xmm10,xmm7 + add edx,ebp + pxor xmm0,xmm9 + ror ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD[4+rsp] + movdqa xmm9,xmm0 + xor ebp,eax + rol edx,5 + movdqa XMMWORD[48+rsp],xmm10 + add ecx,edi + and esi,ebp + xor ebp,eax + pslld xmm0,2 + add ecx,edx + ror edx,7 + psrld xmm9,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD[8+rsp] + por xmm0,xmm9 + xor edx,ebp + rol ecx,5 + pshufd xmm10,xmm7,238 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[12+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm1,xmm5 + add ebp,DWORD[16+rsp] + xor esi,ecx + punpcklqdq xmm10,xmm0 + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + add ebp,esi + xor edi,ecx + movdqa xmm9,xmm8 + ror ebx,7 + paddd xmm8,xmm0 + add ebp,eax + pxor xmm1,xmm10 + add edx,DWORD[20+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + add edx,edi + xor esi,ebx + movdqa XMMWORD[rsp],xmm8 + ror eax,7 + add edx,ebp + add ecx,DWORD[24+rsp] + pslld xmm1,2 + xor esi,eax + mov edi,edx + psrld xmm10,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm1,xmm10 + add ecx,edx + add ebx,DWORD[28+rsp] + pshufd xmm8,xmm0,238 + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm2,xmm6 + add eax,DWORD[32+rsp] + xor esi,edx + punpcklqdq xmm8,xmm1 + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + add eax,esi + xor edi,edx + movdqa xmm10,XMMWORD[r14] + ror ecx,7 + paddd xmm9,xmm1 + add eax,ebx + pxor xmm2,xmm8 + add ebp,DWORD[36+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + add ebp,edi + xor esi,ecx + movdqa XMMWORD[16+rsp],xmm9 + ror ebx,7 + add ebp,eax + add edx,DWORD[40+rsp] + pslld xmm2,2 + xor esi,ebx + mov edi,ebp + psrld xmm8,30 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + por xmm2,xmm8 + add edx,ebp + add ecx,DWORD[44+rsp] + pshufd xmm9,xmm1,238 + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm3,xmm7 + add ebx,DWORD[48+rsp] + xor esi,ebp + punpcklqdq xmm9,xmm2 + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + add ebx,esi + xor edi,ebp + movdqa xmm8,xmm10 + ror edx,7 + paddd xmm10,xmm2 + add ebx,ecx + pxor xmm3,xmm9 + add eax,DWORD[52+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + add eax,edi + xor esi,edx + movdqa XMMWORD[32+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD[56+rsp] + pslld xmm3,2 + xor esi,ecx + mov edi,eax + psrld xmm9,30 + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + por xmm3,xmm9 + add ebp,eax + add edx,DWORD[60+rsp] + pshufd xmm10,xmm2,238 + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm4,xmm0 + add ecx,DWORD[rsp] + xor esi,eax + punpcklqdq xmm10,xmm3 + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + add ecx,esi + xor edi,eax + movdqa xmm9,xmm8 + ror ebp,7 + paddd xmm8,xmm3 + add ecx,edx + pxor xmm4,xmm10 + add ebx,DWORD[4+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + add ebx,edi + xor esi,ebp + movdqa XMMWORD[48+rsp],xmm8 + ror edx,7 + add ebx,ecx + add eax,DWORD[8+rsp] + pslld xmm4,2 + xor esi,edx + mov edi,ebx + psrld xmm10,30 + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + por xmm4,xmm10 + add eax,ebx + add ebp,DWORD[12+rsp] + pshufd xmm8,xmm3,238 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm5,xmm1 + add edx,DWORD[16+rsp] + xor esi,ebx + punpcklqdq xmm8,xmm4 + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + add edx,esi + xor edi,ebx + movdqa xmm10,xmm9 + ror eax,7 + paddd xmm9,xmm4 + add edx,ebp + pxor xmm5,xmm8 + add ecx,DWORD[20+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + add ecx,edi + xor esi,eax + movdqa XMMWORD[rsp],xmm9 + ror ebp,7 + add ecx,edx + add ebx,DWORD[24+rsp] + pslld xmm5,2 + xor esi,ebp + mov edi,ecx + psrld xmm8,30 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + por xmm5,xmm8 + add ebx,ecx + add eax,DWORD[28+rsp] + pshufd xmm9,xmm4,238 + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + pxor xmm6,xmm2 + add ebp,DWORD[32+rsp] + and esi,ecx + xor ecx,edx + ror ebx,7 + punpcklqdq xmm9,xmm5 + mov edi,eax + xor esi,ecx + pxor xmm6,xmm7 + rol eax,5 + add ebp,esi + movdqa xmm8,xmm10 + xor edi,ebx + paddd xmm10,xmm5 + xor ebx,ecx + pxor xmm6,xmm9 + add ebp,eax + add edx,DWORD[36+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + movdqa xmm9,xmm6 + mov esi,ebp + xor edi,ebx + movdqa XMMWORD[16+rsp],xmm10 + rol ebp,5 + add edx,edi + xor esi,eax + pslld xmm6,2 + xor eax,ebx + add edx,ebp + psrld xmm9,30 + add ecx,DWORD[40+rsp] + and esi,eax + xor eax,ebx + por xmm6,xmm9 + ror ebp,7 + mov edi,edx + xor esi,eax + rol edx,5 + pshufd xmm10,xmm5,238 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[44+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + mov esi,ecx + xor edi,ebp + rol ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + pxor xmm7,xmm3 + add eax,DWORD[48+rsp] + and esi,edx + xor edx,ebp + ror ecx,7 + punpcklqdq xmm10,xmm6 + mov edi,ebx + xor esi,edx + pxor xmm7,xmm0 + rol ebx,5 + add eax,esi + movdqa xmm9,XMMWORD[32+r14] + xor edi,ecx + paddd xmm8,xmm6 + xor ecx,edx + pxor xmm7,xmm10 + add eax,ebx + add ebp,DWORD[52+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + movdqa xmm10,xmm7 + mov esi,eax + xor edi,ecx + movdqa XMMWORD[32+rsp],xmm8 + rol eax,5 + add ebp,edi + xor esi,ebx + pslld xmm7,2 + xor ebx,ecx + add ebp,eax + psrld xmm10,30 + add edx,DWORD[56+rsp] + and esi,ebx + xor ebx,ecx + por xmm7,xmm10 + ror eax,7 + mov edi,ebp + xor esi,ebx + rol ebp,5 + pshufd xmm8,xmm6,238 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[60+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + mov esi,edx + xor edi,eax + rol edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + pxor xmm0,xmm4 + add ebx,DWORD[rsp] + and esi,ebp + xor ebp,eax + ror edx,7 + punpcklqdq xmm8,xmm7 + mov edi,ecx + xor esi,ebp + pxor xmm0,xmm1 + rol ecx,5 + add ebx,esi + movdqa xmm10,xmm9 + xor edi,edx + paddd xmm9,xmm7 + xor edx,ebp + pxor xmm0,xmm8 + add ebx,ecx + add eax,DWORD[4+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + movdqa xmm8,xmm0 + mov esi,ebx + xor edi,edx + movdqa XMMWORD[48+rsp],xmm9 + rol ebx,5 + add eax,edi + xor esi,ecx + pslld xmm0,2 + xor ecx,edx + add eax,ebx + psrld xmm8,30 + add ebp,DWORD[8+rsp] + and esi,ecx + xor ecx,edx + por xmm0,xmm8 + ror ebx,7 + mov edi,eax + xor esi,ecx + rol eax,5 + pshufd xmm9,xmm7,238 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[12+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + mov esi,ebp + xor edi,ebx + rol ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + pxor xmm1,xmm5 + add ecx,DWORD[16+rsp] + and esi,eax + xor eax,ebx + ror ebp,7 + punpcklqdq xmm9,xmm0 + mov edi,edx + xor esi,eax + pxor xmm1,xmm2 + rol edx,5 + add ecx,esi + movdqa xmm8,xmm10 + xor edi,ebp + paddd xmm10,xmm0 + xor ebp,eax + pxor xmm1,xmm9 + add ecx,edx + add ebx,DWORD[20+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movdqa xmm9,xmm1 + mov esi,ecx + xor edi,ebp + movdqa XMMWORD[rsp],xmm10 + rol ecx,5 + add ebx,edi + xor esi,edx + pslld xmm1,2 + xor edx,ebp + add ebx,ecx + psrld xmm9,30 + add eax,DWORD[24+rsp] + and esi,edx + xor edx,ebp + por xmm1,xmm9 + ror ecx,7 + mov edi,ebx + xor esi,edx + rol ebx,5 + pshufd xmm10,xmm0,238 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[28+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor edi,ecx + rol eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + pxor xmm2,xmm6 + add edx,DWORD[32+rsp] + and esi,ebx + xor ebx,ecx + ror eax,7 + punpcklqdq xmm10,xmm1 + mov edi,ebp + xor esi,ebx + pxor xmm2,xmm3 + rol ebp,5 + add edx,esi + movdqa xmm9,xmm8 + xor edi,eax + paddd xmm8,xmm1 + xor eax,ebx + pxor xmm2,xmm10 + add edx,ebp + add ecx,DWORD[36+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + movdqa xmm10,xmm2 + mov esi,edx + xor edi,eax + movdqa XMMWORD[16+rsp],xmm8 + rol edx,5 + add ecx,edi + xor esi,ebp + pslld xmm2,2 + xor ebp,eax + add ecx,edx + psrld xmm10,30 + add ebx,DWORD[40+rsp] + and esi,ebp + xor ebp,eax + por xmm2,xmm10 + ror edx,7 + mov edi,ecx + xor esi,ebp + rol ecx,5 + pshufd xmm8,xmm1,238 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[44+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + pxor xmm3,xmm7 + add ebp,DWORD[48+rsp] + xor esi,ecx + punpcklqdq xmm8,xmm2 + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + add ebp,esi + xor edi,ecx + movdqa xmm10,xmm9 + ror ebx,7 + paddd xmm9,xmm2 + add ebp,eax + pxor xmm3,xmm8 + add edx,DWORD[52+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + add edx,edi + xor esi,ebx + movdqa XMMWORD[32+rsp],xmm9 + ror eax,7 + add edx,ebp + add ecx,DWORD[56+rsp] + pslld xmm3,2 + xor esi,eax + mov edi,edx + psrld xmm8,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm3,xmm8 + add ecx,edx + add ebx,DWORD[60+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm3 + add eax,esi + xor edi,edx + movdqa XMMWORD[48+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD[4+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[8+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[12+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + cmp r9,r10 + je NEAR $L$done_ssse3 + movdqa xmm6,XMMWORD[64+r14] + movdqa xmm9,XMMWORD[((-64))+r14] + movdqu xmm0,XMMWORD[r9] + movdqu xmm1,XMMWORD[16+r9] + movdqu xmm2,XMMWORD[32+r9] + movdqu xmm3,XMMWORD[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx +DB 102,15,56,0,206 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm0,xmm9 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD[rsp],xmm0 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm0,xmm9 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,214 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + paddd xmm1,xmm9 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD[16+rsp],xmm1 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm1,xmm9 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,222 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm2,xmm9 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD[32+rsp],xmm2 + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + psubd xmm2,xmm9 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + add edx,DWORD[12+r8] + mov DWORD[r8],eax + add ebp,DWORD[16+r8] + mov DWORD[4+r8],esi + mov ebx,esi + mov DWORD[8+r8],ecx + mov edi,ecx + mov DWORD[12+r8],edx + xor edi,edx + mov DWORD[16+r8],ebp + and esi,edi + jmp NEAR $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3: + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + mov DWORD[r8],eax + add edx,DWORD[12+r8] + mov DWORD[4+r8],esi + add ebp,DWORD[16+r8] + mov DWORD[8+r8],ecx + mov DWORD[12+r8],edx + mov DWORD[16+r8],ebp + movaps xmm6,XMMWORD[((-40-96))+r11] + movaps xmm7,XMMWORD[((-40-80))+r11] + movaps xmm8,XMMWORD[((-40-64))+r11] + movaps xmm9,XMMWORD[((-40-48))+r11] + movaps xmm10,XMMWORD[((-40-32))+r11] + movaps xmm11,XMMWORD[((-40-16))+r11] + mov r14,QWORD[((-40))+r11] + + mov r13,QWORD[((-32))+r11] + + mov r12,QWORD[((-24))+r11] + + mov rbp,QWORD[((-16))+r11] + + mov rbx,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$epilogue_ssse3: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha1_block_data_order_ssse3: +global sha1_block_data_order_avx + +ALIGN 16 +sha1_block_data_order_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov r11,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + lea rsp,[((-160))+rsp] + vzeroupper + vmovaps XMMWORD[(-40-96)+r11],xmm6 + vmovaps XMMWORD[(-40-80)+r11],xmm7 + vmovaps XMMWORD[(-40-64)+r11],xmm8 + vmovaps XMMWORD[(-40-48)+r11],xmm9 + vmovaps XMMWORD[(-40-32)+r11],xmm10 + vmovaps XMMWORD[(-40-16)+r11],xmm11 +$L$prologue_avx: + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r14,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,ebx + mov ebp,DWORD[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + vmovdqa xmm6,XMMWORD[64+r14] + vmovdqa xmm11,XMMWORD[((-64))+r14] + vmovdqu xmm0,XMMWORD[r9] + vmovdqu xmm1,XMMWORD[16+r9] + vmovdqu xmm2,XMMWORD[32+r9] + vmovdqu xmm3,XMMWORD[48+r9] + vpshufb xmm0,xmm0,xmm6 + add r9,64 + vpshufb xmm1,xmm1,xmm6 + vpshufb xmm2,xmm2,xmm6 + vpshufb xmm3,xmm3,xmm6 + vpaddd xmm4,xmm0,xmm11 + vpaddd xmm5,xmm1,xmm11 + vpaddd xmm6,xmm2,xmm11 + vmovdqa XMMWORD[rsp],xmm4 + vmovdqa XMMWORD[16+rsp],xmm5 + vmovdqa XMMWORD[32+rsp],xmm6 + jmp NEAR $L$oop_avx +ALIGN 16 +$L$oop_avx: + shrd ebx,ebx,2 + xor esi,edx + vpalignr xmm4,xmm1,xmm0,8 + mov edi,eax + add ebp,DWORD[rsp] + vpaddd xmm9,xmm11,xmm3 + xor ebx,ecx + shld eax,eax,5 + vpsrldq xmm8,xmm3,4 + add ebp,esi + and edi,ebx + vpxor xmm4,xmm4,xmm0 + xor ebx,ecx + add ebp,eax + vpxor xmm8,xmm8,xmm2 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD[4+rsp] + vpxor xmm4,xmm4,xmm8 + xor eax,ebx + shld ebp,ebp,5 + vmovdqa XMMWORD[48+rsp],xmm9 + add edx,edi + and esi,eax + vpsrld xmm8,xmm4,31 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpslldq xmm10,xmm4,12 + vpaddd xmm4,xmm4,xmm4 + mov edi,edx + add ecx,DWORD[8+rsp] + xor ebp,eax + shld edx,edx,5 + vpsrld xmm9,xmm10,30 + vpor xmm4,xmm4,xmm8 + add ecx,esi + and edi,ebp + xor ebp,eax + add ecx,edx + vpslld xmm10,xmm10,2 + vpxor xmm4,xmm4,xmm9 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD[12+rsp] + vpxor xmm4,xmm4,xmm10 + xor edx,ebp + shld ecx,ecx,5 + add ebx,edi + and esi,edx + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpalignr xmm5,xmm2,xmm1,8 + mov edi,ebx + add eax,DWORD[16+rsp] + vpaddd xmm9,xmm11,xmm4 + xor ecx,edx + shld ebx,ebx,5 + vpsrldq xmm8,xmm4,4 + add eax,esi + and edi,ecx + vpxor xmm5,xmm5,xmm1 + xor ecx,edx + add eax,ebx + vpxor xmm8,xmm8,xmm3 + shrd ebx,ebx,7 + xor edi,edx + mov esi,eax + add ebp,DWORD[20+rsp] + vpxor xmm5,xmm5,xmm8 + xor ebx,ecx + shld eax,eax,5 + vmovdqa XMMWORD[rsp],xmm9 + add ebp,edi + and esi,ebx + vpsrld xmm8,xmm5,31 + xor ebx,ecx + add ebp,eax + shrd eax,eax,7 + xor esi,ecx + vpslldq xmm10,xmm5,12 + vpaddd xmm5,xmm5,xmm5 + mov edi,ebp + add edx,DWORD[24+rsp] + xor eax,ebx + shld ebp,ebp,5 + vpsrld xmm9,xmm10,30 + vpor xmm5,xmm5,xmm8 + add edx,esi + and edi,eax + xor eax,ebx + add edx,ebp + vpslld xmm10,xmm10,2 + vpxor xmm5,xmm5,xmm9 + shrd ebp,ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD[28+rsp] + vpxor xmm5,xmm5,xmm10 + xor ebp,eax + shld edx,edx,5 + vmovdqa xmm11,XMMWORD[((-32))+r14] + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + vpalignr xmm6,xmm3,xmm2,8 + mov edi,ecx + add ebx,DWORD[32+rsp] + vpaddd xmm9,xmm11,xmm5 + xor edx,ebp + shld ecx,ecx,5 + vpsrldq xmm8,xmm5,4 + add ebx,esi + and edi,edx + vpxor xmm6,xmm6,xmm2 + xor edx,ebp + add ebx,ecx + vpxor xmm8,xmm8,xmm4 + shrd ecx,ecx,7 + xor edi,ebp + mov esi,ebx + add eax,DWORD[36+rsp] + vpxor xmm6,xmm6,xmm8 + xor ecx,edx + shld ebx,ebx,5 + vmovdqa XMMWORD[16+rsp],xmm9 + add eax,edi + and esi,ecx + vpsrld xmm8,xmm6,31 + xor ecx,edx + add eax,ebx + shrd ebx,ebx,7 + xor esi,edx + vpslldq xmm10,xmm6,12 + vpaddd xmm6,xmm6,xmm6 + mov edi,eax + add ebp,DWORD[40+rsp] + xor ebx,ecx + shld eax,eax,5 + vpsrld xmm9,xmm10,30 + vpor xmm6,xmm6,xmm8 + add ebp,esi + and edi,ebx + xor ebx,ecx + add ebp,eax + vpslld xmm10,xmm10,2 + vpxor xmm6,xmm6,xmm9 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD[44+rsp] + vpxor xmm6,xmm6,xmm10 + xor eax,ebx + shld ebp,ebp,5 + add edx,edi + and esi,eax + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpalignr xmm7,xmm4,xmm3,8 + mov edi,edx + add ecx,DWORD[48+rsp] + vpaddd xmm9,xmm11,xmm6 + xor ebp,eax + shld edx,edx,5 + vpsrldq xmm8,xmm6,4 + add ecx,esi + and edi,ebp + vpxor xmm7,xmm7,xmm3 + xor ebp,eax + add ecx,edx + vpxor xmm8,xmm8,xmm5 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD[52+rsp] + vpxor xmm7,xmm7,xmm8 + xor edx,ebp + shld ecx,ecx,5 + vmovdqa XMMWORD[32+rsp],xmm9 + add ebx,edi + and esi,edx + vpsrld xmm8,xmm7,31 + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpslldq xmm10,xmm7,12 + vpaddd xmm7,xmm7,xmm7 + mov edi,ebx + add eax,DWORD[56+rsp] + xor ecx,edx + shld ebx,ebx,5 + vpsrld xmm9,xmm10,30 + vpor xmm7,xmm7,xmm8 + add eax,esi + and edi,ecx + xor ecx,edx + add eax,ebx + vpslld xmm10,xmm10,2 + vpxor xmm7,xmm7,xmm9 + shrd ebx,ebx,7 + xor edi,edx + mov esi,eax + add ebp,DWORD[60+rsp] + vpxor xmm7,xmm7,xmm10 + xor ebx,ecx + shld eax,eax,5 + add ebp,edi + and esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + shrd eax,eax,7 + xor esi,ecx + mov edi,ebp + add edx,DWORD[rsp] + vpxor xmm0,xmm0,xmm1 + xor eax,ebx + shld ebp,ebp,5 + vpaddd xmm9,xmm11,xmm7 + add edx,esi + and edi,eax + vpxor xmm0,xmm0,xmm8 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor edi,ebx + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD[48+rsp],xmm9 + mov esi,edx + add ecx,DWORD[4+rsp] + xor ebp,eax + shld edx,edx,5 + vpslld xmm0,xmm0,2 + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + mov edi,ecx + add ebx,DWORD[8+rsp] + vpor xmm0,xmm0,xmm8 + xor edx,ebp + shld ecx,ecx,5 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[12+rsp] + xor edi,ebp + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ebp,DWORD[16+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm1,xmm1,xmm2 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm11,xmm0 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm1,xmm1,xmm8 + add edx,DWORD[20+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD[rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm1,xmm1,2 + add ecx,DWORD[24+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm1,xmm1,xmm8 + add ebx,DWORD[28+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add eax,DWORD[32+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + vpxor xmm2,xmm2,xmm3 + add eax,esi + xor edi,edx + vpaddd xmm9,xmm11,xmm1 + vmovdqa xmm11,XMMWORD[r14] + shrd ecx,ecx,7 + add eax,ebx + vpxor xmm2,xmm2,xmm8 + add ebp,DWORD[36+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD[16+rsp],xmm9 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpslld xmm2,xmm2,2 + add edx,DWORD[40+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vpor xmm2,xmm2,xmm8 + add ecx,DWORD[44+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebx,DWORD[48+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + vpxor xmm3,xmm3,xmm4 + add ebx,esi + xor edi,ebp + vpaddd xmm9,xmm11,xmm2 + shrd edx,edx,7 + add ebx,ecx + vpxor xmm3,xmm3,xmm8 + add eax,DWORD[52+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD[32+rsp],xmm9 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpslld xmm3,xmm3,2 + add ebp,DWORD[56+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpor xmm3,xmm3,xmm8 + add edx,DWORD[60+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpalignr xmm8,xmm3,xmm2,8 + vpxor xmm4,xmm4,xmm0 + add ecx,DWORD[rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + vpxor xmm4,xmm4,xmm5 + add ecx,esi + xor edi,eax + vpaddd xmm9,xmm11,xmm3 + shrd ebp,ebp,7 + add ecx,edx + vpxor xmm4,xmm4,xmm8 + add ebx,DWORD[4+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + vpsrld xmm8,xmm4,30 + vmovdqa XMMWORD[48+rsp],xmm9 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpslld xmm4,xmm4,2 + add eax,DWORD[8+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + vpor xmm4,xmm4,xmm8 + add ebp,DWORD[12+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpalignr xmm8,xmm4,xmm3,8 + vpxor xmm5,xmm5,xmm1 + add edx,DWORD[16+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + vpxor xmm5,xmm5,xmm6 + add edx,esi + xor edi,ebx + vpaddd xmm9,xmm11,xmm4 + shrd eax,eax,7 + add edx,ebp + vpxor xmm5,xmm5,xmm8 + add ecx,DWORD[20+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + vpsrld xmm8,xmm5,30 + vmovdqa XMMWORD[rsp],xmm9 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpslld xmm5,xmm5,2 + add ebx,DWORD[24+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vpor xmm5,xmm5,xmm8 + add eax,DWORD[28+rsp] + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + vpalignr xmm8,xmm5,xmm4,8 + vpxor xmm6,xmm6,xmm2 + add ebp,DWORD[32+rsp] + and esi,ecx + xor ecx,edx + shrd ebx,ebx,7 + vpxor xmm6,xmm6,xmm7 + mov edi,eax + xor esi,ecx + vpaddd xmm9,xmm11,xmm5 + shld eax,eax,5 + add ebp,esi + vpxor xmm6,xmm6,xmm8 + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[36+rsp] + vpsrld xmm8,xmm6,30 + vmovdqa XMMWORD[16+rsp],xmm9 + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + vpslld xmm6,xmm6,2 + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[40+rsp] + and esi,eax + vpor xmm6,xmm6,xmm8 + xor eax,ebx + shrd ebp,ebp,7 + mov edi,edx + xor esi,eax + shld edx,edx,5 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[44+rsp] + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + mov esi,ecx + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + vpalignr xmm8,xmm6,xmm5,8 + vpxor xmm7,xmm7,xmm3 + add eax,DWORD[48+rsp] + and esi,edx + xor edx,ebp + shrd ecx,ecx,7 + vpxor xmm7,xmm7,xmm0 + mov edi,ebx + xor esi,edx + vpaddd xmm9,xmm11,xmm6 + vmovdqa xmm11,XMMWORD[32+r14] + shld ebx,ebx,5 + add eax,esi + vpxor xmm7,xmm7,xmm8 + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[52+rsp] + vpsrld xmm8,xmm7,30 + vmovdqa XMMWORD[32+rsp],xmm9 + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + vpslld xmm7,xmm7,2 + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[56+rsp] + and esi,ebx + vpor xmm7,xmm7,xmm8 + xor ebx,ecx + shrd eax,eax,7 + mov edi,ebp + xor esi,ebx + shld ebp,ebp,5 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[60+rsp] + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + add ebx,DWORD[rsp] + and esi,ebp + xor ebp,eax + shrd edx,edx,7 + vpxor xmm0,xmm0,xmm1 + mov edi,ecx + xor esi,ebp + vpaddd xmm9,xmm11,xmm7 + shld ecx,ecx,5 + add ebx,esi + vpxor xmm0,xmm0,xmm8 + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[4+rsp] + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD[48+rsp],xmm9 + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + vpslld xmm0,xmm0,2 + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[8+rsp] + and esi,ecx + vpor xmm0,xmm0,xmm8 + xor ecx,edx + shrd ebx,ebx,7 + mov edi,eax + xor esi,ecx + shld eax,eax,5 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[12+rsp] + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ecx,DWORD[16+rsp] + and esi,eax + xor eax,ebx + shrd ebp,ebp,7 + vpxor xmm1,xmm1,xmm2 + mov edi,edx + xor esi,eax + vpaddd xmm9,xmm11,xmm0 + shld edx,edx,5 + add ecx,esi + vpxor xmm1,xmm1,xmm8 + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[20+rsp] + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD[rsp],xmm9 + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + mov esi,ecx + vpslld xmm1,xmm1,2 + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[24+rsp] + and esi,edx + vpor xmm1,xmm1,xmm8 + xor edx,ebp + shrd ecx,ecx,7 + mov edi,ebx + xor esi,edx + shld ebx,ebx,5 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[28+rsp] + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add edx,DWORD[32+rsp] + and esi,ebx + xor ebx,ecx + shrd eax,eax,7 + vpxor xmm2,xmm2,xmm3 + mov edi,ebp + xor esi,ebx + vpaddd xmm9,xmm11,xmm1 + shld ebp,ebp,5 + add edx,esi + vpxor xmm2,xmm2,xmm8 + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[36+rsp] + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD[16+rsp],xmm9 + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + vpslld xmm2,xmm2,2 + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[40+rsp] + and esi,ebp + vpor xmm2,xmm2,xmm8 + xor ebp,eax + shrd edx,edx,7 + mov edi,ecx + xor esi,ebp + shld ecx,ecx,5 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[44+rsp] + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebp,DWORD[48+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm3,xmm3,xmm4 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm11,xmm2 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm3,xmm3,xmm8 + add edx,DWORD[52+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD[32+rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm3,xmm3,2 + add ecx,DWORD[56+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm3,xmm3,xmm8 + add ebx,DWORD[60+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[rsp] + vpaddd xmm9,xmm11,xmm3 + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + vmovdqa XMMWORD[48+rsp],xmm9 + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[4+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[8+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[12+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + cmp r9,r10 + je NEAR $L$done_avx + vmovdqa xmm6,XMMWORD[64+r14] + vmovdqa xmm11,XMMWORD[((-64))+r14] + vmovdqu xmm0,XMMWORD[r9] + vmovdqu xmm1,XMMWORD[16+r9] + vmovdqu xmm2,XMMWORD[32+r9] + vmovdqu xmm3,XMMWORD[48+r9] + vpshufb xmm0,xmm0,xmm6 + add r9,64 + add ebx,DWORD[16+rsp] + xor esi,ebp + vpshufb xmm1,xmm1,xmm6 + mov edi,ecx + shld ecx,ecx,5 + vpaddd xmm4,xmm0,xmm11 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vmovdqa XMMWORD[rsp],xmm4 + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + vpshufb xmm2,xmm2,xmm6 + mov edi,edx + shld edx,edx,5 + vpaddd xmm5,xmm1,xmm11 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vmovdqa XMMWORD[16+rsp],xmm5 + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + vpshufb xmm3,xmm3,xmm6 + mov edi,ebp + shld ebp,ebp,5 + vpaddd xmm6,xmm2,xmm11 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vmovdqa XMMWORD[32+rsp],xmm6 + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + shrd ecx,ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + add edx,DWORD[12+r8] + mov DWORD[r8],eax + add ebp,DWORD[16+r8] + mov DWORD[4+r8],esi + mov ebx,esi + mov DWORD[8+r8],ecx + mov edi,ecx + mov DWORD[12+r8],edx + xor edi,edx + mov DWORD[16+r8],ebp + and esi,edi + jmp NEAR $L$oop_avx + +ALIGN 16 +$L$done_avx: + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + shrd ecx,ecx,7 + add eax,ebx + vzeroupper + + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + mov DWORD[r8],eax + add edx,DWORD[12+r8] + mov DWORD[4+r8],esi + add ebp,DWORD[16+r8] + mov DWORD[8+r8],ecx + mov DWORD[12+r8],edx + mov DWORD[16+r8],ebp + movaps xmm6,XMMWORD[((-40-96))+r11] + movaps xmm7,XMMWORD[((-40-80))+r11] + movaps xmm8,XMMWORD[((-40-64))+r11] + movaps xmm9,XMMWORD[((-40-48))+r11] + movaps xmm10,XMMWORD[((-40-32))+r11] + movaps xmm11,XMMWORD[((-40-16))+r11] + mov r14,QWORD[((-40))+r11] + + mov r13,QWORD[((-32))+r11] + + mov r12,QWORD[((-24))+r11] + + mov rbp,QWORD[((-16))+r11] + + mov rbx,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha1_block_data_order_avx: +global sha1_block_data_order_avx2 + +ALIGN 16 +sha1_block_data_order_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov r11,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + vzeroupper + lea rsp,[((-96))+rsp] + vmovaps XMMWORD[(-40-96)+r11],xmm6 + vmovaps XMMWORD[(-40-80)+r11],xmm7 + vmovaps XMMWORD[(-40-64)+r11],xmm8 + vmovaps XMMWORD[(-40-48)+r11],xmm9 + vmovaps XMMWORD[(-40-32)+r11],xmm10 + vmovaps XMMWORD[(-40-16)+r11],xmm11 +$L$prologue_avx2: + mov r8,rdi + mov r9,rsi + mov r10,rdx + + lea rsp,[((-640))+rsp] + shl r10,6 + lea r13,[64+r9] + and rsp,-128 + add r10,r9 + lea r14,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + cmp r13,r10 + cmovae r13,r9 + mov ebp,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,DWORD[16+r8] + vmovdqu ymm6,YMMWORD[64+r14] + + vmovdqu xmm0,XMMWORD[r9] + vmovdqu xmm1,XMMWORD[16+r9] + vmovdqu xmm2,XMMWORD[32+r9] + vmovdqu xmm3,XMMWORD[48+r9] + lea r9,[64+r9] + vinserti128 ymm0,ymm0,XMMWORD[r13],1 + vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 + vpshufb ymm0,ymm0,ymm6 + vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 + vpshufb ymm1,ymm1,ymm6 + vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 + vpshufb ymm2,ymm2,ymm6 + vmovdqu ymm11,YMMWORD[((-64))+r14] + vpshufb ymm3,ymm3,ymm6 + + vpaddd ymm4,ymm0,ymm11 + vpaddd ymm5,ymm1,ymm11 + vmovdqu YMMWORD[rsp],ymm4 + vpaddd ymm6,ymm2,ymm11 + vmovdqu YMMWORD[32+rsp],ymm5 + vpaddd ymm7,ymm3,ymm11 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vpalignr ymm4,ymm1,ymm0,8 + vpsrldq ymm8,ymm3,4 + vpxor ymm4,ymm4,ymm0 + vpxor ymm8,ymm8,ymm2 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm8,ymm4,31 + vpslldq ymm10,ymm4,12 + vpaddd ymm4,ymm4,ymm4 + vpsrld ymm9,ymm10,30 + vpor ymm4,ymm4,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm4,ymm4,ymm9 + vpxor ymm4,ymm4,ymm10 + vpaddd ymm9,ymm4,ymm11 + vmovdqu YMMWORD[128+rsp],ymm9 + vpalignr ymm5,ymm2,ymm1,8 + vpsrldq ymm8,ymm4,4 + vpxor ymm5,ymm5,ymm1 + vpxor ymm8,ymm8,ymm3 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm5,31 + vmovdqu ymm11,YMMWORD[((-32))+r14] + vpslldq ymm10,ymm5,12 + vpaddd ymm5,ymm5,ymm5 + vpsrld ymm9,ymm10,30 + vpor ymm5,ymm5,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm5,ymm5,ymm9 + vpxor ymm5,ymm5,ymm10 + vpaddd ymm9,ymm5,ymm11 + vmovdqu YMMWORD[160+rsp],ymm9 + vpalignr ymm6,ymm3,ymm2,8 + vpsrldq ymm8,ymm5,4 + vpxor ymm6,ymm6,ymm2 + vpxor ymm8,ymm8,ymm4 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm6,31 + vpslldq ymm10,ymm6,12 + vpaddd ymm6,ymm6,ymm6 + vpsrld ymm9,ymm10,30 + vpor ymm6,ymm6,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm6,ymm6,ymm9 + vpxor ymm6,ymm6,ymm10 + vpaddd ymm9,ymm6,ymm11 + vmovdqu YMMWORD[192+rsp],ymm9 + vpalignr ymm7,ymm4,ymm3,8 + vpsrldq ymm8,ymm6,4 + vpxor ymm7,ymm7,ymm3 + vpxor ymm8,ymm8,ymm5 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm7,31 + vpslldq ymm10,ymm7,12 + vpaddd ymm7,ymm7,ymm7 + vpsrld ymm9,ymm10,30 + vpor ymm7,ymm7,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm7,ymm7,ymm9 + vpxor ymm7,ymm7,ymm10 + vpaddd ymm9,ymm7,ymm11 + vmovdqu YMMWORD[224+rsp],ymm9 + lea r13,[128+rsp] + jmp NEAR $L$oop_avx2 +ALIGN 32 +$L$oop_avx2: + rorx ebx,ebp,2 + andn edi,ebp,edx + and ebp,ecx + xor ebp,edi + jmp NEAR $L$align32_1 +ALIGN 32 +$L$align32_1: + vpalignr ymm8,ymm7,ymm6,8 + vpxor ymm0,ymm0,ymm4 + add esi,DWORD[((-128))+r13] + andn edi,eax,ecx + vpxor ymm0,ymm0,ymm1 + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + vpxor ymm0,ymm0,ymm8 + and eax,ebx + add esi,r12d + xor eax,edi + vpsrld ymm8,ymm0,30 + vpslld ymm0,ymm0,2 + add edx,DWORD[((-124))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + vpor ymm0,ymm0,ymm8 + add edx,r12d + xor esi,edi + add ecx,DWORD[((-120))+r13] + andn edi,edx,ebp + vpaddd ymm9,ymm0,ymm11 + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + vmovdqu YMMWORD[256+rsp],ymm9 + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-116))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[((-96))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + vpalignr ymm8,ymm0,ymm7,8 + vpxor ymm1,ymm1,ymm5 + add eax,DWORD[((-92))+r13] + andn edi,ebp,edx + vpxor ymm1,ymm1,ymm2 + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + vpxor ymm1,ymm1,ymm8 + and ebp,ecx + add eax,r12d + xor ebp,edi + vpsrld ymm8,ymm1,30 + vpslld ymm1,ymm1,2 + add esi,DWORD[((-88))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + vpor ymm1,ymm1,ymm8 + add esi,r12d + xor eax,edi + add edx,DWORD[((-84))+r13] + andn edi,esi,ebx + vpaddd ymm9,ymm1,ymm11 + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + vmovdqu YMMWORD[288+rsp],ymm9 + add edx,r12d + xor esi,edi + add ecx,DWORD[((-64))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-60))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + vpalignr ymm8,ymm1,ymm0,8 + vpxor ymm2,ymm2,ymm6 + add ebp,DWORD[((-56))+r13] + andn edi,ebx,esi + vpxor ymm2,ymm2,ymm3 + vmovdqu ymm11,YMMWORD[r14] + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpxor ymm2,ymm2,ymm8 + and ebx,edx + add ebp,r12d + xor ebx,edi + vpsrld ymm8,ymm2,30 + vpslld ymm2,ymm2,2 + add eax,DWORD[((-52))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + vpor ymm2,ymm2,ymm8 + add eax,r12d + xor ebp,edi + add esi,DWORD[((-32))+r13] + andn edi,eax,ecx + vpaddd ymm9,ymm2,ymm11 + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + vmovdqu YMMWORD[320+rsp],ymm9 + add esi,r12d + xor eax,edi + add edx,DWORD[((-28))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-24))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + vpalignr ymm8,ymm2,ymm1,8 + vpxor ymm3,ymm3,ymm7 + add ebx,DWORD[((-20))+r13] + andn edi,ecx,eax + vpxor ymm3,ymm3,ymm4 + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpxor ymm3,ymm3,ymm8 + and ecx,esi + add ebx,r12d + xor ecx,edi + vpsrld ymm8,ymm3,30 + vpslld ymm3,ymm3,2 + add ebp,DWORD[r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + vpor ymm3,ymm3,ymm8 + add ebp,r12d + xor ebx,edi + add eax,DWORD[4+r13] + andn edi,ebp,edx + vpaddd ymm9,ymm3,ymm11 + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + vmovdqu YMMWORD[352+rsp],ymm9 + add eax,r12d + xor ebp,edi + add esi,DWORD[8+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[12+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + vpalignr ymm8,ymm3,ymm2,8 + vpxor ymm4,ymm4,ymm0 + add ecx,DWORD[32+r13] + lea ecx,[rsi*1+rcx] + vpxor ymm4,ymm4,ymm5 + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpxor ymm4,ymm4,ymm8 + add ecx,r12d + xor edx,ebp + add ebx,DWORD[36+r13] + vpsrld ymm8,ymm4,30 + vpslld ymm4,ymm4,2 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vpor ymm4,ymm4,ymm8 + add ebp,DWORD[40+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpaddd ymm9,ymm4,ymm11 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[44+r13] + vmovdqu YMMWORD[384+rsp],ymm9 + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[64+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpalignr ymm8,ymm4,ymm3,8 + vpxor ymm5,ymm5,ymm1 + add edx,DWORD[68+r13] + lea edx,[rax*1+rdx] + vpxor ymm5,ymm5,ymm6 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + vpxor ymm5,ymm5,ymm8 + add edx,r12d + xor esi,ebx + add ecx,DWORD[72+r13] + vpsrld ymm8,ymm5,30 + vpslld ymm5,ymm5,2 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + vpor ymm5,ymm5,ymm8 + add ebx,DWORD[76+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpaddd ymm9,ymm5,ymm11 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[96+r13] + vmovdqu YMMWORD[416+rsp],ymm9 + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[100+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpalignr ymm8,ymm5,ymm4,8 + vpxor ymm6,ymm6,ymm2 + add esi,DWORD[104+r13] + lea esi,[rbp*1+rsi] + vpxor ymm6,ymm6,ymm7 + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + vpxor ymm6,ymm6,ymm8 + add esi,r12d + xor eax,ecx + add edx,DWORD[108+r13] + lea r13,[256+r13] + vpsrld ymm8,ymm6,30 + vpslld ymm6,ymm6,2 + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + vpor ymm6,ymm6,ymm8 + add ecx,DWORD[((-128))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + vpaddd ymm9,ymm6,ymm11 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-124))+r13] + vmovdqu YMMWORD[448+rsp],ymm9 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-120))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpalignr ymm8,ymm6,ymm5,8 + vpxor ymm7,ymm7,ymm3 + add eax,DWORD[((-116))+r13] + lea eax,[rbx*1+rax] + vpxor ymm7,ymm7,ymm0 + vmovdqu ymm11,YMMWORD[32+r14] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + vpxor ymm7,ymm7,ymm8 + add eax,r12d + xor ebp,edx + add esi,DWORD[((-96))+r13] + vpsrld ymm8,ymm7,30 + vpslld ymm7,ymm7,2 + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpor ymm7,ymm7,ymm8 + add edx,DWORD[((-92))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpaddd ymm9,ymm7,ymm11 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-88))+r13] + vmovdqu YMMWORD[480+rsp],ymm9 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-84))+r13] + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + jmp NEAR $L$align32_2 +ALIGN 32 +$L$align32_2: + vpalignr ymm8,ymm7,ymm6,8 + vpxor ymm0,ymm0,ymm4 + add ebp,DWORD[((-64))+r13] + xor ecx,esi + vpxor ymm0,ymm0,ymm1 + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + vpxor ymm0,ymm0,ymm8 + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + vpsrld ymm8,ymm0,30 + vpslld ymm0,ymm0,2 + add ebp,r12d + and ebx,edi + add eax,DWORD[((-60))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + vpor ymm0,ymm0,ymm8 + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + vpaddd ymm9,ymm0,ymm11 + add eax,r12d + and ebp,edi + add esi,DWORD[((-56))+r13] + xor ebp,ecx + vmovdqu YMMWORD[512+rsp],ymm9 + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[((-52))+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + add ecx,DWORD[((-32))+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + vpalignr ymm8,ymm0,ymm7,8 + vpxor ymm1,ymm1,ymm5 + add ebx,DWORD[((-28))+r13] + xor edx,eax + vpxor ymm1,ymm1,ymm2 + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + vpxor ymm1,ymm1,ymm8 + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vpsrld ymm8,ymm1,30 + vpslld ymm1,ymm1,2 + add ebx,r12d + and ecx,edi + add ebp,DWORD[((-24))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + vpor ymm1,ymm1,ymm8 + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + vpaddd ymm9,ymm1,ymm11 + add ebp,r12d + and ebx,edi + add eax,DWORD[((-20))+r13] + xor ebx,edx + vmovdqu YMMWORD[544+rsp],ymm9 + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[4+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + vpalignr ymm8,ymm1,ymm0,8 + vpxor ymm2,ymm2,ymm6 + add ecx,DWORD[8+r13] + xor esi,ebp + vpxor ymm2,ymm2,ymm3 + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + vpxor ymm2,ymm2,ymm8 + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpsrld ymm8,ymm2,30 + vpslld ymm2,ymm2,2 + add ecx,r12d + and edx,edi + add ebx,DWORD[12+r13] + xor edx,eax + mov edi,esi + xor edi,eax + vpor ymm2,ymm2,ymm8 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vpaddd ymm9,ymm2,ymm11 + add ebx,r12d + and ecx,edi + add ebp,DWORD[32+r13] + xor ecx,esi + vmovdqu YMMWORD[576+rsp],ymm9 + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[36+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[40+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + vpalignr ymm8,ymm2,ymm1,8 + vpxor ymm3,ymm3,ymm7 + add edx,DWORD[44+r13] + xor eax,ebx + vpxor ymm3,ymm3,ymm4 + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + vpxor ymm3,ymm3,ymm8 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + vpsrld ymm8,ymm3,30 + vpslld ymm3,ymm3,2 + add edx,r12d + and esi,edi + add ecx,DWORD[64+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + vpor ymm3,ymm3,ymm8 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpaddd ymm9,ymm3,ymm11 + add ecx,r12d + and edx,edi + add ebx,DWORD[68+r13] + xor edx,eax + vmovdqu YMMWORD[608+rsp],ymm9 + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD[72+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[76+r13] + xor ebx,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[96+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[100+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[104+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[108+r13] + lea r13,[256+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-128))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-124))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-120))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-116))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-96))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-92))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-88))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-84))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-64))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-60))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-56))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-52))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-32))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-28))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-24))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-20))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + add edx,r12d + lea r13,[128+r9] + lea rdi,[128+r9] + cmp r13,r10 + cmovae r13,r9 + + + add edx,DWORD[r8] + add esi,DWORD[4+r8] + add ebp,DWORD[8+r8] + mov DWORD[r8],edx + add ebx,DWORD[12+r8] + mov DWORD[4+r8],esi + mov eax,edx + add ecx,DWORD[16+r8] + mov r12d,ebp + mov DWORD[8+r8],ebp + mov edx,ebx + + mov DWORD[12+r8],ebx + mov ebp,esi + mov DWORD[16+r8],ecx + + mov esi,ecx + mov ecx,r12d + + + cmp r9,r10 + je NEAR $L$done_avx2 + vmovdqu ymm6,YMMWORD[64+r14] + cmp rdi,r10 + ja NEAR $L$ast_avx2 + + vmovdqu xmm0,XMMWORD[((-64))+rdi] + vmovdqu xmm1,XMMWORD[((-48))+rdi] + vmovdqu xmm2,XMMWORD[((-32))+rdi] + vmovdqu xmm3,XMMWORD[((-16))+rdi] + vinserti128 ymm0,ymm0,XMMWORD[r13],1 + vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 + vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 + vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 + jmp NEAR $L$ast_avx2 + +ALIGN 32 +$L$ast_avx2: + lea r13,[((128+16))+rsp] + rorx ebx,ebp,2 + andn edi,ebp,edx + and ebp,ecx + xor ebp,edi + sub r9,-128 + add esi,DWORD[((-128))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[((-124))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-120))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-116))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[((-96))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD[((-92))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD[((-88))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[((-84))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-64))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-60))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[((-56))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD[((-52))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD[((-32))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[((-28))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-24))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-20))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD[4+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD[8+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[12+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[32+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[36+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[40+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[44+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[64+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vmovdqu ymm11,YMMWORD[((-64))+r14] + vpshufb ymm0,ymm0,ymm6 + add edx,DWORD[68+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[72+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[76+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[96+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[100+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpshufb ymm1,ymm1,ymm6 + vpaddd ymm8,ymm0,ymm11 + add esi,DWORD[104+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[108+r13] + lea r13,[256+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-128))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-124))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-120))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vmovdqu YMMWORD[rsp],ymm8 + vpshufb ymm2,ymm2,ymm6 + vpaddd ymm9,ymm1,ymm11 + add eax,DWORD[((-116))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-96))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-92))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-88))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-84))+r13] + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + vmovdqu YMMWORD[32+rsp],ymm9 + vpshufb ymm3,ymm3,ymm6 + vpaddd ymm6,ymm2,ymm11 + add ebp,DWORD[((-64))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[((-60))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[((-56))+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[((-52))+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + add ecx,DWORD[((-32))+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + jmp NEAR $L$align32_3 +ALIGN 32 +$L$align32_3: + vmovdqu YMMWORD[64+rsp],ymm6 + vpaddd ymm7,ymm3,ymm11 + add ebx,DWORD[((-28))+r13] + xor edx,eax + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD[((-24))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[((-20))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[4+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + vmovdqu YMMWORD[96+rsp],ymm7 + add ecx,DWORD[8+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + add ebx,DWORD[12+r13] + xor edx,eax + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD[32+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[36+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[40+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + vpalignr ymm4,ymm1,ymm0,8 + add edx,DWORD[44+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + vpsrldq ymm8,ymm3,4 + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpxor ymm4,ymm4,ymm0 + vpxor ymm8,ymm8,ymm2 + xor esi,ebp + add edx,r12d + vpxor ymm4,ymm4,ymm8 + and esi,edi + add ecx,DWORD[64+r13] + xor esi,ebp + mov edi,eax + vpsrld ymm8,ymm4,31 + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + vpslldq ymm10,ymm4,12 + vpaddd ymm4,ymm4,ymm4 + rorx esi,edx,2 + xor edx,eax + vpsrld ymm9,ymm10,30 + vpor ymm4,ymm4,ymm8 + add ecx,r12d + and edx,edi + vpslld ymm10,ymm10,2 + vpxor ymm4,ymm4,ymm9 + add ebx,DWORD[68+r13] + xor edx,eax + vpxor ymm4,ymm4,ymm10 + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + vpaddd ymm9,ymm4,ymm11 + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vmovdqu YMMWORD[128+rsp],ymm9 + add ebx,r12d + and ecx,edi + add ebp,DWORD[72+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[76+r13] + xor ebx,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpalignr ymm5,ymm2,ymm1,8 + add esi,DWORD[96+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + vpsrldq ymm8,ymm4,4 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpxor ymm5,ymm5,ymm1 + vpxor ymm8,ymm8,ymm3 + add edx,DWORD[100+r13] + lea edx,[rax*1+rdx] + vpxor ymm5,ymm5,ymm8 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + vpsrld ymm8,ymm5,31 + vmovdqu ymm11,YMMWORD[((-32))+r14] + xor esi,ebx + add ecx,DWORD[104+r13] + lea ecx,[rsi*1+rcx] + vpslldq ymm10,ymm5,12 + vpaddd ymm5,ymm5,ymm5 + rorx r12d,edx,27 + rorx esi,edx,2 + vpsrld ymm9,ymm10,30 + vpor ymm5,ymm5,ymm8 + xor edx,eax + add ecx,r12d + vpslld ymm10,ymm10,2 + vpxor ymm5,ymm5,ymm9 + xor edx,ebp + add ebx,DWORD[108+r13] + lea r13,[256+r13] + vpxor ymm5,ymm5,ymm10 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpaddd ymm9,ymm5,ymm11 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vmovdqu YMMWORD[160+rsp],ymm9 + add ebp,DWORD[((-128))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpalignr ymm6,ymm3,ymm2,8 + add eax,DWORD[((-124))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + vpsrldq ymm8,ymm5,4 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpxor ymm6,ymm6,ymm2 + vpxor ymm8,ymm8,ymm4 + add esi,DWORD[((-120))+r13] + lea esi,[rbp*1+rsi] + vpxor ymm6,ymm6,ymm8 + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + vpsrld ymm8,ymm6,31 + xor eax,ecx + add edx,DWORD[((-116))+r13] + lea edx,[rax*1+rdx] + vpslldq ymm10,ymm6,12 + vpaddd ymm6,ymm6,ymm6 + rorx r12d,esi,27 + rorx eax,esi,2 + vpsrld ymm9,ymm10,30 + vpor ymm6,ymm6,ymm8 + xor esi,ebp + add edx,r12d + vpslld ymm10,ymm10,2 + vpxor ymm6,ymm6,ymm9 + xor esi,ebx + add ecx,DWORD[((-96))+r13] + vpxor ymm6,ymm6,ymm10 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + vpaddd ymm9,ymm6,ymm11 + xor edx,eax + add ecx,r12d + xor edx,ebp + vmovdqu YMMWORD[192+rsp],ymm9 + add ebx,DWORD[((-92))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vpalignr ymm7,ymm4,ymm3,8 + add ebp,DWORD[((-88))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpsrldq ymm8,ymm6,4 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpxor ymm7,ymm7,ymm3 + vpxor ymm8,ymm8,ymm5 + add eax,DWORD[((-84))+r13] + lea eax,[rbx*1+rax] + vpxor ymm7,ymm7,ymm8 + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + vpsrld ymm8,ymm7,31 + xor ebp,edx + add esi,DWORD[((-64))+r13] + lea esi,[rbp*1+rsi] + vpslldq ymm10,ymm7,12 + vpaddd ymm7,ymm7,ymm7 + rorx r12d,eax,27 + rorx ebp,eax,2 + vpsrld ymm9,ymm10,30 + vpor ymm7,ymm7,ymm8 + xor eax,ebx + add esi,r12d + vpslld ymm10,ymm10,2 + vpxor ymm7,ymm7,ymm9 + xor eax,ecx + add edx,DWORD[((-60))+r13] + vpxor ymm7,ymm7,ymm10 + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpaddd ymm9,ymm7,ymm11 + xor esi,ebp + add edx,r12d + xor esi,ebx + vmovdqu YMMWORD[224+rsp],ymm9 + add ecx,DWORD[((-56))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-52))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-32))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-28))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-24))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-20))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + add edx,r12d + lea r13,[128+rsp] + + + add edx,DWORD[r8] + add esi,DWORD[4+r8] + add ebp,DWORD[8+r8] + mov DWORD[r8],edx + add ebx,DWORD[12+r8] + mov DWORD[4+r8],esi + mov eax,edx + add ecx,DWORD[16+r8] + mov r12d,ebp + mov DWORD[8+r8],ebp + mov edx,ebx + + mov DWORD[12+r8],ebx + mov ebp,esi + mov DWORD[16+r8],ecx + + mov esi,ecx + mov ecx,r12d + + + cmp r9,r10 + jbe NEAR $L$oop_avx2 + +$L$done_avx2: + vzeroupper + movaps xmm6,XMMWORD[((-40-96))+r11] + movaps xmm7,XMMWORD[((-40-80))+r11] + movaps xmm8,XMMWORD[((-40-64))+r11] + movaps xmm9,XMMWORD[((-40-48))+r11] + movaps xmm10,XMMWORD[((-40-32))+r11] + movaps xmm11,XMMWORD[((-40-16))+r11] + mov r14,QWORD[((-40))+r11] + + mov r13,QWORD[((-32))+r11] + + mov r12,QWORD[((-24))+r11] + + mov rbp,QWORD[((-16))+r11] + + mov rbx,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$epilogue_avx2: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha1_block_data_order_avx2: +section .rdata rdata align=8 +ALIGN 64 +K_XX_XX: + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 + DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 + DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 + DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 + DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 + DB 103,62,0 +ALIGN 64 +section .text + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[64+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + + jmp NEAR $L$common_seh_tail + + +ALIGN 16 +shaext_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue_shaext] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + lea r10,[$L$epilogue_shaext] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[((-8-64))+rax] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + + jmp NEAR $L$common_seh_tail + + +ALIGN 16 +ssse3_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[208+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[((-40-96))+rax] + lea rdi,[512+r8] + mov ecx,12 + DD 0xa548f3fc + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha1_block_data_order_nohw wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_nohw wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_nohw wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_hw wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_hw wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_hw wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha1_block_data_order_nohw: + DB 9,0,0,0 + DD se_handler wrt ..imagebase +$L$SEH_info_sha1_block_data_order_hw: + DB 9,0,0,0 + DD shaext_handler wrt ..imagebase +$L$SEH_info_sha1_block_data_order_ssse3: + DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase +$L$SEH_info_sha1_block_data_order_avx: + DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +$L$SEH_info_sha1_block_data_order_avx2: + DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-586-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha256-586-apple.S new file mode 100644 index 0000000000..8e74e68620 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-586-apple.S @@ -0,0 +1,5593 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw +.align 4 +_sha256_block_data_order_nohw: +L_sha256_block_data_order_nohw_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal LK256-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) +L001no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae L002unrolled + jmp L003loop +.align 4,0x90 +L003loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 4,0x90 +L00400_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne L00400_15 + movl 156(%esp),%ecx + jmp L00516_63 +.align 4,0x90 +L00516_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne L00516_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb L003loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +LK256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 4,0x90 +L002unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp L006grand_loop +.align 4,0x90 +L006grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb L006grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _sha256_block_data_order_ssse3 +.private_extern _sha256_block_data_order_ssse3 +.align 4 +_sha256_block_data_order_ssse3: +L_sha256_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L007pic_point +L007pic_point: + popl %ebp + leal LK256-L007pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp L008grand_ssse3 +.align 4,0x90 +L008grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp L009ssse3_00_47 +.align 4,0x90 +L009ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L009ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L008grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _sha256_block_data_order_avx +.private_extern _sha256_block_data_order_avx +.align 4 +_sha256_block_data_order_avx: +L_sha256_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L010pic_point +L010pic_point: + popl %ebp + leal LK256-L010pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp L011grand_avx +.align 5,0x90 +L011grand_avx: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp L012avx_00_47 +.align 4,0x90 +L012avx_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm0,%xmm0 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm0,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd (%ebp),%xmm0,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm1,%xmm1 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm1,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm2,%xmm2 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm2,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm3,%xmm3 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm3,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L012avx_00_47 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L011grand_avx + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-586-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha256-586-linux.S new file mode 100644 index 0000000000..41b3759d36 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-586-linux.S @@ -0,0 +1,5599 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,@function +.align 16 +sha256_block_data_order_nohw: +.L_sha256_block_data_order_nohw_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .LK256-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) +.L001no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae .L002unrolled + jmp .L003loop +.align 16 +.L003loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00400_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00400_15 + movl 156(%esp),%ecx + jmp .L00516_63 +.align 16 +.L00516_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00516_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L003loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.LK256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 16 +.L002unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L006grand_loop +.align 16 +.L006grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L006grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order_nohw,.-.L_sha256_block_data_order_nohw_begin +.globl sha256_block_data_order_ssse3 +.hidden sha256_block_data_order_ssse3 +.type sha256_block_data_order_ssse3,@function +.align 16 +sha256_block_data_order_ssse3: +.L_sha256_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L007pic_point +.L007pic_point: + popl %ebp + leal .LK256-.L007pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp .L008grand_ssse3 +.align 16 +.L008grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp .L009ssse3_00_47 +.align 16 +.L009ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L009ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L008grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order_ssse3,.-.L_sha256_block_data_order_ssse3_begin +.globl sha256_block_data_order_avx +.hidden sha256_block_data_order_avx +.type sha256_block_data_order_avx,@function +.align 16 +sha256_block_data_order_avx: +.L_sha256_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L010pic_point +.L010pic_point: + popl %ebp + leal .LK256-.L010pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L011grand_avx +.align 32 +.L011grand_avx: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L012avx_00_47 +.align 16 +.L012avx_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm0,%xmm0 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm0,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd (%ebp),%xmm0,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm1,%xmm1 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm1,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm2,%xmm2 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm2,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm3,%xmm3 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm3,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L012avx_00_47 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L011grand_avx + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order_avx,.-.L_sha256_block_data_order_avx_begin +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-586-win.asm b/yass/third_party/boringssl/src/gen/bcm/sha256-586-win.asm new file mode 100644 index 0000000000..0ef244d810 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-586-win.asm @@ -0,0 +1,5601 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _sha256_block_data_order_nohw +align 16 +_sha256_block_data_order_nohw: +L$_sha256_block_data_order_nohw_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$000pic_point +L$000pic_point: + pop ebp + lea ebp,[(L$K256-L$000pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx +L$001no_xmm: + sub eax,edi + cmp eax,256 + jae NEAR L$002unrolled + jmp NEAR L$003loop +align 16 +L$003loop: + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + bswap eax + mov edx,DWORD [12+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD [16+edi] + mov ebx,DWORD [20+edi] + mov ecx,DWORD [24+edi] + bswap eax + mov edx,DWORD [28+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD [32+edi] + mov ebx,DWORD [36+edi] + mov ecx,DWORD [40+edi] + bswap eax + mov edx,DWORD [44+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD [48+edi] + mov ebx,DWORD [52+edi] + mov ecx,DWORD [56+edi] + bswap eax + mov edx,DWORD [60+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + add edi,64 + lea esp,[esp-36] + mov DWORD [104+esp],edi + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edi,DWORD [12+esi] + mov DWORD [8+esp],ebx + xor ebx,ecx + mov DWORD [12+esp],ecx + mov DWORD [16+esp],edi + mov DWORD [esp],ebx + mov edx,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edi,DWORD [28+esi] + mov DWORD [24+esp],ebx + mov DWORD [28+esp],ecx + mov DWORD [32+esp],edi +align 16 +L$00400_15: + mov ecx,edx + mov esi,DWORD [24+esp] + ror ecx,14 + mov edi,DWORD [28+esp] + xor ecx,edx + xor esi,edi + mov ebx,DWORD [96+esp] + ror ecx,5 + and esi,edx + mov DWORD [20+esp],edx + xor edx,ecx + add ebx,DWORD [32+esp] + xor esi,edi + ror edx,6 + mov ecx,eax + add ebx,esi + ror ecx,9 + add ebx,edx + mov edi,DWORD [8+esp] + xor ecx,eax + mov DWORD [4+esp],eax + lea esp,[esp-4] + ror ecx,11 + mov esi,DWORD [ebp] + xor ecx,eax + mov edx,DWORD [20+esp] + xor eax,edi + ror ecx,2 + add ebx,esi + mov DWORD [esp],eax + add edx,ebx + and eax,DWORD [4+esp] + add ebx,ecx + xor eax,edi + add ebp,4 + add eax,ebx + cmp esi,3248222580 + jne NEAR L$00400_15 + mov ecx,DWORD [156+esp] + jmp NEAR L$00516_63 +align 16 +L$00516_63: + mov ebx,ecx + mov esi,DWORD [104+esp] + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [160+esp] + shr edi,10 + add ebx,DWORD [124+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [24+esp] + ror ecx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor ecx,edx + xor esi,edi + mov DWORD [96+esp],ebx + ror ecx,5 + and esi,edx + mov DWORD [20+esp],edx + xor edx,ecx + add ebx,DWORD [32+esp] + xor esi,edi + ror edx,6 + mov ecx,eax + add ebx,esi + ror ecx,9 + add ebx,edx + mov edi,DWORD [8+esp] + xor ecx,eax + mov DWORD [4+esp],eax + lea esp,[esp-4] + ror ecx,11 + mov esi,DWORD [ebp] + xor ecx,eax + mov edx,DWORD [20+esp] + xor eax,edi + ror ecx,2 + add ebx,esi + mov DWORD [esp],eax + add edx,ebx + and eax,DWORD [4+esp] + add ebx,ecx + xor eax,edi + mov ecx,DWORD [156+esp] + add ebp,4 + add eax,ebx + cmp esi,3329325298 + jne NEAR L$00516_63 + mov esi,DWORD [356+esp] + mov ebx,DWORD [8+esp] + mov ecx,DWORD [16+esp] + add eax,DWORD [esi] + add ebx,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov eax,DWORD [24+esp] + mov ebx,DWORD [28+esp] + mov ecx,DWORD [32+esp] + mov edi,DWORD [360+esp] + add edx,DWORD [16+esi] + add eax,DWORD [20+esi] + add ebx,DWORD [24+esi] + add ecx,DWORD [28+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],eax + mov DWORD [24+esi],ebx + mov DWORD [28+esi],ecx + lea esp,[356+esp] + sub ebp,256 + cmp edi,DWORD [8+esp] + jb NEAR L$003loop + mov esp,DWORD [12+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$K256: +dd 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +dd 66051,67438087,134810123,202182159 +db 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +db 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +db 62,0 +align 16 +L$002unrolled: + lea esp,[esp-96] + mov eax,DWORD [esi] + mov ebp,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov ebx,DWORD [12+esi] + mov DWORD [4+esp],ebp + xor ebp,ecx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],ebx + mov edx,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov esi,DWORD [28+esi] + mov DWORD [20+esp],ebx + mov DWORD [24+esp],ecx + mov DWORD [28+esp],esi + jmp NEAR L$006grand_loop +align 16 +L$006grand_loop: + mov ebx,DWORD [edi] + mov ecx,DWORD [4+edi] + bswap ebx + mov esi,DWORD [8+edi] + bswap ecx + mov DWORD [32+esp],ebx + bswap esi + mov DWORD [36+esp],ecx + mov DWORD [40+esp],esi + mov ebx,DWORD [12+edi] + mov ecx,DWORD [16+edi] + bswap ebx + mov esi,DWORD [20+edi] + bswap ecx + mov DWORD [44+esp],ebx + bswap esi + mov DWORD [48+esp],ecx + mov DWORD [52+esp],esi + mov ebx,DWORD [24+edi] + mov ecx,DWORD [28+edi] + bswap ebx + mov esi,DWORD [32+edi] + bswap ecx + mov DWORD [56+esp],ebx + bswap esi + mov DWORD [60+esp],ecx + mov DWORD [64+esp],esi + mov ebx,DWORD [36+edi] + mov ecx,DWORD [40+edi] + bswap ebx + mov esi,DWORD [44+edi] + bswap ecx + mov DWORD [68+esp],ebx + bswap esi + mov DWORD [72+esp],ecx + mov DWORD [76+esp],esi + mov ebx,DWORD [48+edi] + mov ecx,DWORD [52+edi] + bswap ebx + mov esi,DWORD [56+edi] + bswap ecx + mov DWORD [80+esp],ebx + bswap esi + mov DWORD [84+esp],ecx + mov DWORD [88+esp],esi + mov ebx,DWORD [60+edi] + add edi,64 + bswap ebx + mov DWORD [100+esp],edi + mov DWORD [92+esp],ebx + mov ecx,edx + mov esi,DWORD [20+esp] + ror edx,14 + mov edi,DWORD [24+esp] + xor edx,ecx + mov ebx,DWORD [32+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1116352408+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [16+esp] + ror edx,14 + mov edi,DWORD [20+esp] + xor edx,esi + mov ebx,DWORD [36+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1899447441+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [12+esp] + ror edx,14 + mov edi,DWORD [16+esp] + xor edx,ecx + mov ebx,DWORD [40+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3049323471+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [8+esp] + ror edx,14 + mov edi,DWORD [12+esp] + xor edx,esi + mov ebx,DWORD [44+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3921009573+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [4+esp] + ror edx,14 + mov edi,DWORD [8+esp] + xor edx,ecx + mov ebx,DWORD [48+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[961987163+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [esp] + ror edx,14 + mov edi,DWORD [4+esp] + xor edx,esi + mov ebx,DWORD [52+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1508970993+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [28+esp] + ror edx,14 + mov edi,DWORD [esp] + xor edx,ecx + mov ebx,DWORD [56+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2453635748+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [24+esp] + ror edx,14 + mov edi,DWORD [28+esp] + xor edx,esi + mov ebx,DWORD [60+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2870763221+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [20+esp] + ror edx,14 + mov edi,DWORD [24+esp] + xor edx,ecx + mov ebx,DWORD [64+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3624381080+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [16+esp] + ror edx,14 + mov edi,DWORD [20+esp] + xor edx,esi + mov ebx,DWORD [68+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[310598401+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [12+esp] + ror edx,14 + mov edi,DWORD [16+esp] + xor edx,ecx + mov ebx,DWORD [72+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[607225278+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [8+esp] + ror edx,14 + mov edi,DWORD [12+esp] + xor edx,esi + mov ebx,DWORD [76+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1426881987+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [4+esp] + ror edx,14 + mov edi,DWORD [8+esp] + xor edx,ecx + mov ebx,DWORD [80+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1925078388+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [esp] + ror edx,14 + mov edi,DWORD [4+esp] + xor edx,esi + mov ebx,DWORD [84+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2162078206+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [28+esp] + ror edx,14 + mov edi,DWORD [esp] + xor edx,ecx + mov ebx,DWORD [88+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2614888103+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [24+esp] + ror edx,14 + mov edi,DWORD [28+esp] + xor edx,esi + mov ebx,DWORD [92+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3248222580+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [36+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [88+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [32+esp] + shr edi,10 + add ebx,DWORD [68+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [32+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3835390401+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [40+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [92+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [36+esp] + shr edi,10 + add ebx,DWORD [72+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [36+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[4022224774+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [44+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [32+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [40+esp] + shr edi,10 + add ebx,DWORD [76+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [40+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[264347078+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [48+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [36+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [44+esp] + shr edi,10 + add ebx,DWORD [80+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [44+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[604807628+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [52+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [40+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [48+esp] + shr edi,10 + add ebx,DWORD [84+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [48+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[770255983+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [56+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [44+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [52+esp] + shr edi,10 + add ebx,DWORD [88+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [52+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1249150122+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [60+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [48+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [56+esp] + shr edi,10 + add ebx,DWORD [92+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [56+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1555081692+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [64+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [52+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [60+esp] + shr edi,10 + add ebx,DWORD [32+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [60+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1996064986+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [68+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [56+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [64+esp] + shr edi,10 + add ebx,DWORD [36+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [64+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2554220882+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [72+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [60+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [68+esp] + shr edi,10 + add ebx,DWORD [40+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [68+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2821834349+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [76+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [64+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [72+esp] + shr edi,10 + add ebx,DWORD [44+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [72+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2952996808+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [80+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [68+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [76+esp] + shr edi,10 + add ebx,DWORD [48+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [76+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3210313671+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [84+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [72+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [80+esp] + shr edi,10 + add ebx,DWORD [52+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [80+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3336571891+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [88+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [76+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [84+esp] + shr edi,10 + add ebx,DWORD [56+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [84+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3584528711+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [92+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [80+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [88+esp] + shr edi,10 + add ebx,DWORD [60+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [88+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[113926993+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [32+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [84+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [92+esp] + shr edi,10 + add ebx,DWORD [64+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [92+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[338241895+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [36+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [88+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [32+esp] + shr edi,10 + add ebx,DWORD [68+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [32+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[666307205+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [40+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [92+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [36+esp] + shr edi,10 + add ebx,DWORD [72+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [36+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[773529912+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [44+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [32+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [40+esp] + shr edi,10 + add ebx,DWORD [76+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [40+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1294757372+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [48+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [36+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [44+esp] + shr edi,10 + add ebx,DWORD [80+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [44+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1396182291+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [52+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [40+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [48+esp] + shr edi,10 + add ebx,DWORD [84+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [48+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1695183700+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [56+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [44+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [52+esp] + shr edi,10 + add ebx,DWORD [88+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [52+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1986661051+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [60+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [48+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [56+esp] + shr edi,10 + add ebx,DWORD [92+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [56+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2177026350+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [64+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [52+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [60+esp] + shr edi,10 + add ebx,DWORD [32+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [60+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2456956037+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [68+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [56+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [64+esp] + shr edi,10 + add ebx,DWORD [36+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [64+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2730485921+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [72+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [60+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [68+esp] + shr edi,10 + add ebx,DWORD [40+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [68+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2820302411+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [76+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [64+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [72+esp] + shr edi,10 + add ebx,DWORD [44+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [72+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3259730800+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [80+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [68+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [76+esp] + shr edi,10 + add ebx,DWORD [48+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [76+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3345764771+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [84+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [72+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [80+esp] + shr edi,10 + add ebx,DWORD [52+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [80+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3516065817+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [88+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [76+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [84+esp] + shr edi,10 + add ebx,DWORD [56+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [84+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3600352804+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [92+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [80+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [88+esp] + shr edi,10 + add ebx,DWORD [60+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [88+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[4094571909+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [32+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [84+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [92+esp] + shr edi,10 + add ebx,DWORD [64+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [92+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[275423344+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [36+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [88+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [32+esp] + shr edi,10 + add ebx,DWORD [68+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [32+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[430227734+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [40+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [92+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [36+esp] + shr edi,10 + add ebx,DWORD [72+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [36+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[506948616+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [44+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [32+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [40+esp] + shr edi,10 + add ebx,DWORD [76+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [40+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[659060556+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [48+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [36+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [44+esp] + shr edi,10 + add ebx,DWORD [80+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [44+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[883997877+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [52+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [40+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [48+esp] + shr edi,10 + add ebx,DWORD [84+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [48+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[958139571+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [56+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [44+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [52+esp] + shr edi,10 + add ebx,DWORD [88+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [52+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1322822218+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [60+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [48+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [56+esp] + shr edi,10 + add ebx,DWORD [92+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [56+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1537002063+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [64+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [52+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [60+esp] + shr edi,10 + add ebx,DWORD [32+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [60+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1747873779+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [68+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [56+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [64+esp] + shr edi,10 + add ebx,DWORD [36+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [64+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1955562222+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [72+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [60+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [68+esp] + shr edi,10 + add ebx,DWORD [40+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [68+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2024104815+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [76+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [64+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [72+esp] + shr edi,10 + add ebx,DWORD [44+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [72+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2227730452+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [80+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [68+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [76+esp] + shr edi,10 + add ebx,DWORD [48+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [76+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2361852424+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [84+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [72+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [80+esp] + shr edi,10 + add ebx,DWORD [52+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [80+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2428436474+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [88+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [76+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [84+esp] + shr edi,10 + add ebx,DWORD [56+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [84+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2756734187+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [92+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [80+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [88+esp] + shr edi,10 + add ebx,DWORD [60+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3204031479+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [32+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [84+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [92+esp] + shr edi,10 + add ebx,DWORD [64+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3329325298+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [96+esp] + xor ebp,edi + mov ecx,DWORD [12+esp] + add eax,DWORD [esi] + add ebp,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebp + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov DWORD [4+esp],ebp + xor ebp,edi + mov DWORD [8+esp],edi + mov DWORD [12+esp],ecx + mov edi,DWORD [20+esp] + mov ebx,DWORD [24+esp] + mov ecx,DWORD [28+esp] + add edx,DWORD [16+esi] + add edi,DWORD [20+esi] + add ebx,DWORD [24+esi] + add ecx,DWORD [28+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],edi + mov DWORD [24+esi],ebx + mov DWORD [28+esi],ecx + mov DWORD [20+esp],edi + mov edi,DWORD [100+esp] + mov DWORD [24+esp],ebx + mov DWORD [28+esp],ecx + cmp edi,DWORD [104+esp] + jb NEAR L$006grand_loop + mov esp,DWORD [108+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _sha256_block_data_order_ssse3 +align 16 +_sha256_block_data_order_ssse3: +L$_sha256_block_data_order_ssse3_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$007pic_point +L$007pic_point: + pop ebp + lea ebp,[(L$K256-L$007pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx + lea esp,[esp-96] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edi,DWORD [12+esi] + mov DWORD [4+esp],ebx + xor ebx,ecx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],edi + mov edx,DWORD [16+esi] + mov edi,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov esi,DWORD [28+esi] + mov DWORD [20+esp],edi + mov edi,DWORD [100+esp] + mov DWORD [24+esp],ecx + mov DWORD [28+esp],esi + movdqa xmm7,[256+ebp] + jmp NEAR L$008grand_ssse3 +align 16 +L$008grand_ssse3: + movdqu xmm0,[edi] + movdqu xmm1,[16+edi] + movdqu xmm2,[32+edi] + movdqu xmm3,[48+edi] + add edi,64 +db 102,15,56,0,199 + mov DWORD [100+esp],edi +db 102,15,56,0,207 + movdqa xmm4,[ebp] +db 102,15,56,0,215 + movdqa xmm5,[16+ebp] + paddd xmm4,xmm0 +db 102,15,56,0,223 + movdqa xmm6,[32+ebp] + paddd xmm5,xmm1 + movdqa xmm7,[48+ebp] + movdqa [32+esp],xmm4 + paddd xmm6,xmm2 + movdqa [48+esp],xmm5 + paddd xmm7,xmm3 + movdqa [64+esp],xmm6 + movdqa [80+esp],xmm7 + jmp NEAR L$009ssse3_00_47 +align 16 +L$009ssse3_00_47: + add ebp,64 + mov ecx,edx + movdqa xmm4,xmm1 + ror edx,14 + mov esi,DWORD [20+esp] + movdqa xmm7,xmm3 + xor edx,ecx + mov edi,DWORD [24+esp] +db 102,15,58,15,224,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,250,4 + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [4+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm3,250 + xor ecx,esi + add edx,DWORD [32+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [12+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [16+esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [12+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm4 + mov DWORD [28+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [36+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [8+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm0,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + pshufd xmm7,xmm0,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [40+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [4+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[ebp] + and esi,ecx + mov DWORD [4+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + paddd xmm6,xmm0 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [44+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + movdqa [32+esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm2 + ror edx,14 + mov esi,DWORD [4+esp] + movdqa xmm7,xmm0 + xor edx,ecx + mov edi,DWORD [8+esp] +db 102,15,58,15,225,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,251,4 + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [20+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD [16+esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm0,250 + xor ecx,esi + add edx,DWORD [48+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [28+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [28+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm4 + mov DWORD [12+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [52+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [24+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm1,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + pshufd xmm7,xmm1,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [56+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [20+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[16+ebp] + and esi,ecx + mov DWORD [20+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + paddd xmm6,xmm1 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [60+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + movdqa [48+esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm3 + ror edx,14 + mov esi,DWORD [20+esp] + movdqa xmm7,xmm1 + xor edx,ecx + mov edi,DWORD [24+esp] +db 102,15,58,15,226,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,248,4 + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [4+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm1,250 + xor ecx,esi + add edx,DWORD [64+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [12+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [16+esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [12+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm4 + mov DWORD [28+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [68+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [8+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm2,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + pshufd xmm7,xmm2,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [72+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [4+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[32+ebp] + and esi,ecx + mov DWORD [4+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + paddd xmm6,xmm2 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [76+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + movdqa [64+esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm0 + ror edx,14 + mov esi,DWORD [4+esp] + movdqa xmm7,xmm2 + xor edx,ecx + mov edi,DWORD [8+esp] +db 102,15,58,15,227,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,249,4 + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [20+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD [16+esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm2,250 + xor ecx,esi + add edx,DWORD [80+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [28+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [28+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm4 + mov DWORD [12+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [84+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [24+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm3,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + pshufd xmm7,xmm3,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [88+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [20+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[48+ebp] + and esi,ecx + mov DWORD [20+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + paddd xmm6,xmm3 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [92+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + movdqa [80+esp],xmm6 + cmp DWORD [64+ebp],66051 + jne NEAR L$009ssse3_00_47 + mov ecx,edx + ror edx,14 + mov esi,DWORD [20+esp] + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + mov esi,eax + ror ecx,9 + mov DWORD [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [32+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [16+esp] + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + mov DWORD [28+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [36+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [40+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [44+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [4+esp] + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + mov esi,eax + ror ecx,9 + mov DWORD [16+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [48+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [esp] + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [12+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [52+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [56+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [60+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [20+esp] + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + mov esi,eax + ror ecx,9 + mov DWORD [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [64+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [16+esp] + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + mov DWORD [28+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [68+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [72+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [76+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [4+esp] + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + mov esi,eax + ror ecx,9 + mov DWORD [16+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [80+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [esp] + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [12+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [84+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [88+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [92+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + mov esi,DWORD [96+esp] + xor ebx,edi + mov ecx,DWORD [12+esp] + add eax,DWORD [esi] + add ebx,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov DWORD [4+esp],ebx + xor ebx,edi + mov DWORD [8+esp],edi + mov DWORD [12+esp],ecx + mov edi,DWORD [20+esp] + mov ecx,DWORD [24+esp] + add edx,DWORD [16+esi] + add edi,DWORD [20+esi] + add ecx,DWORD [24+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],edi + mov DWORD [20+esp],edi + mov edi,DWORD [28+esp] + mov DWORD [24+esi],ecx + add edi,DWORD [28+esi] + mov DWORD [24+esp],ecx + mov DWORD [28+esi],edi + mov DWORD [28+esp],edi + mov edi,DWORD [100+esp] + movdqa xmm7,[64+ebp] + sub ebp,192 + cmp edi,DWORD [104+esp] + jb NEAR L$008grand_ssse3 + mov esp,DWORD [108+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _sha256_block_data_order_avx +align 16 +_sha256_block_data_order_avx: +L$_sha256_block_data_order_avx_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$010pic_point +L$010pic_point: + pop ebp + lea ebp,[(L$K256-L$010pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx + lea esp,[esp-96] + vzeroall + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edi,DWORD [12+esi] + mov DWORD [4+esp],ebx + xor ebx,ecx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],edi + mov edx,DWORD [16+esi] + mov edi,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov esi,DWORD [28+esi] + mov DWORD [20+esp],edi + mov edi,DWORD [100+esp] + mov DWORD [24+esp],ecx + mov DWORD [28+esp],esi + vmovdqa xmm7,[256+ebp] + jmp NEAR L$011grand_avx +align 32 +L$011grand_avx: + vmovdqu xmm0,[edi] + vmovdqu xmm1,[16+edi] + vmovdqu xmm2,[32+edi] + vmovdqu xmm3,[48+edi] + add edi,64 + vpshufb xmm0,xmm0,xmm7 + mov DWORD [100+esp],edi + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,[ebp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,[16+ebp] + vpaddd xmm6,xmm2,[32+ebp] + vpaddd xmm7,xmm3,[48+ebp] + vmovdqa [32+esp],xmm4 + vmovdqa [48+esp],xmm5 + vmovdqa [64+esp],xmm6 + vmovdqa [80+esp],xmm7 + jmp NEAR L$012avx_00_47 +align 16 +L$012avx_00_47: + add ebp,64 + vpalignr xmm4,xmm1,xmm0,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [20+esp] + vpalignr xmm7,xmm3,xmm2,4 + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + vpaddd xmm0,xmm0,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + vpshufd xmm7,xmm3,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD [32+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [16+esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + vpaddd xmm0,xmm0,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [28+esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD [36+esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [12+esp] + vpaddd xmm0,xmm0,xmm7 + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + vpshufd xmm7,xmm0,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [24+esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD [40+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + vpaddd xmm0,xmm0,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [8+esp] + vpaddd xmm6,xmm0,[ebp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [44+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + vmovdqa [32+esp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [4+esp] + vpalignr xmm7,xmm0,xmm3,4 + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD [esp],ecx + vpaddd xmm1,xmm1,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [16+esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + vpshufd xmm7,xmm0,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD [48+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + vpaddd xmm1,xmm1,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [12+esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD [52+esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [28+esp] + vpaddd xmm1,xmm1,xmm7 + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + vpshufd xmm7,xmm1,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [8+esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD [56+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + vpaddd xmm1,xmm1,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [24+esp] + vpaddd xmm6,xmm1,[16+ebp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [60+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + vmovdqa [48+esp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [20+esp] + vpalignr xmm7,xmm1,xmm0,4 + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + vpaddd xmm2,xmm2,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + vpshufd xmm7,xmm1,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD [64+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [16+esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + vpaddd xmm2,xmm2,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [28+esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD [68+esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [12+esp] + vpaddd xmm2,xmm2,xmm7 + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + vpshufd xmm7,xmm2,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [24+esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD [72+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + vpaddd xmm2,xmm2,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [8+esp] + vpaddd xmm6,xmm2,[32+ebp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [76+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + vmovdqa [64+esp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [4+esp] + vpalignr xmm7,xmm2,xmm1,4 + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD [esp],ecx + vpaddd xmm3,xmm3,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [16+esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + vpshufd xmm7,xmm2,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD [80+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + vpaddd xmm3,xmm3,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [12+esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD [84+esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [28+esp] + vpaddd xmm3,xmm3,xmm7 + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + vpshufd xmm7,xmm3,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [8+esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD [88+esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + vpaddd xmm3,xmm3,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [24+esp] + vpaddd xmm6,xmm3,[48+ebp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [92+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + vmovdqa [80+esp],xmm6 + cmp DWORD [64+ebp],66051 + jne NEAR L$012avx_00_47 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [20+esp] + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [32+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [16+esp] + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [28+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [36+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [24+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [40+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [44+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [4+esp] + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [16+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [48+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [esp] + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [12+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [52+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [8+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [56+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [60+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [20+esp] + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [64+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [16+esp] + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [28+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [68+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [24+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [72+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [76+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [4+esp] + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [16+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [80+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [esp] + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [12+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [84+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD [8+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [88+esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [92+esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + mov esi,DWORD [96+esp] + xor ebx,edi + mov ecx,DWORD [12+esp] + add eax,DWORD [esi] + add ebx,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov DWORD [4+esp],ebx + xor ebx,edi + mov DWORD [8+esp],edi + mov DWORD [12+esp],ecx + mov edi,DWORD [20+esp] + mov ecx,DWORD [24+esp] + add edx,DWORD [16+esi] + add edi,DWORD [20+esi] + add ecx,DWORD [24+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],edi + mov DWORD [20+esp],edi + mov edi,DWORD [28+esp] + mov DWORD [24+esi],ecx + add edi,DWORD [28+esi] + mov DWORD [24+esp],ecx + mov DWORD [28+esi],edi + mov DWORD [28+esp],edi + mov edi,DWORD [100+esp] + vmovdqa xmm7,[64+ebp] + sub ebp,192 + cmp edi,DWORD [104+esp] + jb NEAR L$011grand_avx + mov esp,DWORD [108+esp] + vzeroall + pop edi + pop esi + pop ebx + pop ebp + ret +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-armv4-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha256-armv4-linux.S new file mode 100644 index 0000000000..fca0681f9c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-armv4-linux.S @@ -0,0 +1,2839 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +@ +@ Licensed under the OpenSSL license (the "License"). You may not use +@ this file except in compliance with the License. You can obtain a copy +@ in the file LICENSE in the source distribution or at +@ https://www.openssl.org/source/license.html + + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include +#else +# define __ARM_ARCH __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both +@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those +@ instructions are manually-encoded. (See unsha256.) +.arch armv7-a + +.text +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +.align 5 + +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,%function +sha256_block_data_order_nohw: + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + adr r14,K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.LK256_shortcut_neon: +@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode. +#if defined(__thumb2__) +.word K256-(.LK256_add_neon+4) +#else +.word K256-(.LK256_add_neon+8) +#endif + +.globl sha256_block_data_order_neon +.hidden sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 5 +.skip 16 +sha256_block_data_order_neon: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + + sub r11,sp,#16*4+16 + + @ K256 is just at the boundary of being easily referenced by an ADR from + @ this function. In Arm mode, when building with __ARM_ARCH=6, it does + @ not fit. By moving code around, we could make it fit, but this is too + @ fragile. For simplicity, just load the offset from + @ .LK256_shortcut_neon. + @ + @ TODO(davidben): adrl would avoid a load, but clang-assembler does not + @ support it. We might be able to emulate it with a macro, but Android's + @ did not work when I tried it. + @ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm + ldr r14,.LK256_shortcut_neon +.LK256_add_neon: + add r14,pc,r14 + + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8,r9,r10,r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# if defined(__thumb2__) +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.LK256_shortcut_hw: +@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode. +#if defined(__thumb2__) +.word K256-(.LK256_add_hw+4) +#else +.word K256-(.LK256_add_hw+8) +#endif + +.globl sha256_block_data_order_hw +.hidden sha256_block_data_order_hw +.type sha256_block_data_order_hw,%function +.align 5 +sha256_block_data_order_hw: + @ K256 is too far to reference from one ADR command in Thumb mode. In + @ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte + @ boundary. For simplicity, just load the offset from .LK256_shortcut_hw. + ldr r3,.LK256_shortcut_hw +.LK256_add_hw: + add r3,pc,r3 + + vld1.32 {q0,q1},[r0] + add r2,r1,r2,lsl#6 @ len to point at the end of inp + b .Loop_v8 + +.align 4 +.Loop_v8: + vld1.8 {q8,q9},[r1]! + vld1.8 {q10,q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_hw,.-sha256_block_data_order_hw +#endif +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-apple.S new file mode 100644 index 0000000000..a78236ba98 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-apple.S @@ -0,0 +1,1193 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. + +#ifndef __KERNEL__ +# include +#endif + +.text + +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw + +.align 6 +_sha256_block_data_order_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adrp x30,LK256@PAGE + add x30,x30,LK256@PAGEOFF + stp x0,x2,[x29,#96] + +Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.section __TEXT,__const +.align 6 + +LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.text +#ifndef __KERNEL__ +.globl _sha256_block_data_order_hw +.private_extern _sha256_block_data_order_hw + +.align 6 +_sha256_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adrp x3,LK256@PAGE + add x3,x3,LK256@PAGEOFF + +Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-linux.S new file mode 100644 index 0000000000..44201086f9 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-linux.S @@ -0,0 +1,1193 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. + +#ifndef __KERNEL__ +# include +#endif + +.text + +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,%function +.align 6 +sha256_block_data_order_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adrp x30,.LK256 + add x30,x30,:lo12:.LK256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw + +.section .rodata +.align 6 +.type .LK256,%object +.LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator +.size .LK256,.-.LK256 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.text +#ifndef __KERNEL__ +.globl sha256_block_data_order_hw +.hidden sha256_block_data_order_hw +.type sha256_block_data_order_hw,%function +.align 6 +sha256_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adrp x3,.LK256 + add x3,x3,:lo12:.LK256 + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_data_order_hw,.-sha256_block_data_order_hw +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-win.S new file mode 100644 index 0000000000..89d3944afb --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-armv8-win.S @@ -0,0 +1,1197 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. + +#ifndef __KERNEL__ +# include +#endif + +.text + +.globl sha256_block_data_order_nohw + +.def sha256_block_data_order_nohw + .type 32 +.endef +.align 6 +sha256_block_data_order_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adrp x30,LK256 + add x30,x30,:lo12:LK256 + stp x0,x2,[x29,#96] + +Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.section .rodata +.align 6 + +LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.text +#ifndef __KERNEL__ +.globl sha256_block_data_order_hw + +.def sha256_block_data_order_hw + .type 32 +.endef +.align 6 +sha256_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adrp x3,LK256 + add x3,x3,:lo12:LK256 + +Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-apple.S new file mode 100644 index 0000000000..b33f8072c6 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-apple.S @@ -0,0 +1,4170 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw + +.p2align 4 +_sha256_block_data_order_nohw: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) + +L$prologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz L$rounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + movq 88(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue: + ret + + +.section __DATA,__const +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +.globl _sha256_block_data_order_hw +.private_extern _sha256_block_data_order_hw + +.p2align 6 +_sha256_block_data_order_hw: + +_CET_ENDBR + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz L$oop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + ret + + +.globl _sha256_block_data_order_ssse3 +.private_extern _sha256_block_data_order_ssse3 + +.p2align 6 +_sha256_block_data_order_ssse3: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) + +L$prologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp L$loop_ssse3 +.p2align 4 +L$loop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$ssse3_00_47 + +.p2align 4 +L$ssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$ssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_ssse3 + + movq 88(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue_ssse3: + ret + + +.globl _sha256_block_data_order_avx +.private_extern _sha256_block_data_order_avx + +.p2align 6 +_sha256_block_data_order_avx: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) + +L$prologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp L$loop_avx +.p2align 4 +L$loop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$avx_00_47 + +.p2align 4 +L$avx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$avx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_avx + + movq 88(%rsp),%rsi + + vzeroupper + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue_avx: + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-linux.S new file mode 100644 index 0000000000..8476b031c9 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-linux.S @@ -0,0 +1,4170 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,@function +.align 16 +sha256_block_data_order_nohw: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + ret +.cfi_endproc +.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw +.section .rodata +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +.globl sha256_block_data_order_hw +.hidden sha256_block_data_order_hw +.type sha256_block_data_order_hw,@function +.align 64 +sha256_block_data_order_hw: +.cfi_startproc +_CET_ENDBR + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + ret +.cfi_endproc +.size sha256_block_data_order_hw,.-sha256_block_data_order_hw +.globl sha256_block_data_order_ssse3 +.hidden sha256_block_data_order_ssse3 +.type sha256_block_data_order_ssse3,@function +.align 64 +sha256_block_data_order_ssse3: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_ssse3: + ret +.cfi_endproc +.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 +.globl sha256_block_data_order_avx +.hidden sha256_block_data_order_avx +.type sha256_block_data_order_avx,@function +.align 64 +sha256_block_data_order_avx: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lavx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_avx + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + ret +.cfi_endproc +.size sha256_block_data_order_avx,.-sha256_block_data_order_avx +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-win.asm new file mode 100644 index 0000000000..ada8dbab6a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha256-x86_64-win.asm @@ -0,0 +1,4415 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +global sha256_block_data_order_nohw + +ALIGN 16 +sha256_block_data_order_nohw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_nohw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,16*4+4*8 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + +$L$prologue: + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov edi,ebx + lea rbp,[K256] + xor edi,ecx + mov r12d,DWORD[rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + add r11d,r14d + mov r12d,DWORD[4+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + add r10d,r14d + mov r12d,DWORD[8+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + add r9d,r14d + mov r12d,DWORD[12+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + add r8d,r14d + mov r12d,DWORD[16+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + add edx,r14d + mov r12d,DWORD[20+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + add ecx,r14d + mov r12d,DWORD[24+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + add ebx,r14d + mov r12d,DWORD[28+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + add eax,r14d + mov r12d,DWORD[32+rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + add r11d,r14d + mov r12d,DWORD[36+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + add r10d,r14d + mov r12d,DWORD[40+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + add r9d,r14d + mov r12d,DWORD[44+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + add r8d,r14d + mov r12d,DWORD[48+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + add edx,r14d + mov r12d,DWORD[52+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + add ecx,r14d + mov r12d,DWORD[56+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + add ebx,r14d + mov r12d,DWORD[60+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + jmp NEAR $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx: + mov r13d,DWORD[4+rsp] + mov r15d,DWORD[56+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[36+rsp] + + add r12d,DWORD[rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[8+rsp] + mov edi,DWORD[60+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[40+rsp] + + add r12d,DWORD[4+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[12+rsp] + mov r15d,DWORD[rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[44+rsp] + + add r12d,DWORD[8+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[16+rsp] + mov edi,DWORD[4+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[48+rsp] + + add r12d,DWORD[12+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[20+rsp] + mov r15d,DWORD[8+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[52+rsp] + + add r12d,DWORD[16+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[24+rsp] + mov edi,DWORD[12+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[56+rsp] + + add r12d,DWORD[20+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[28+rsp] + mov r15d,DWORD[16+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[60+rsp] + + add r12d,DWORD[24+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[32+rsp] + mov edi,DWORD[20+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[rsp] + + add r12d,DWORD[28+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[36+rsp] + mov r15d,DWORD[24+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[4+rsp] + + add r12d,DWORD[32+rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[40+rsp] + mov edi,DWORD[28+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[8+rsp] + + add r12d,DWORD[36+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[44+rsp] + mov r15d,DWORD[32+rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[12+rsp] + + add r12d,DWORD[40+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[48+rsp] + mov edi,DWORD[36+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[16+rsp] + + add r12d,DWORD[44+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[52+rsp] + mov r15d,DWORD[40+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[20+rsp] + + add r12d,DWORD[48+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[56+rsp] + mov edi,DWORD[44+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[24+rsp] + + add r12d,DWORD[52+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[60+rsp] + mov r15d,DWORD[48+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[28+rsp] + + add r12d,DWORD[56+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[rsp] + mov edi,DWORD[52+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[32+rsp] + + add r12d,DWORD[60+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + cmp BYTE[3+rbp],0 + jnz NEAR $L$rounds_16_xx + + mov rdi,QWORD[((64+0))+rsp] + add eax,r14d + lea rsi,[64+rsi] + + add eax,DWORD[rdi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop + + mov rsi,QWORD[88+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha256_block_data_order_nohw: +section .rdata rdata align=8 +ALIGN 64 + +K256: + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 + DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 + DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 + DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 + DB 111,114,103,62,0 +section .text + +global sha256_block_data_order_hw + +ALIGN 64 +sha256_block_data_order_hw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_hw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rsp,[((-88))+rsp] + movaps XMMWORD[(-8-80)+rax],xmm6 + movaps XMMWORD[(-8-64)+rax],xmm7 + movaps XMMWORD[(-8-48)+rax],xmm8 + movaps XMMWORD[(-8-32)+rax],xmm9 + movaps XMMWORD[(-8-16)+rax],xmm10 +$L$prologue_shaext: + lea rcx,[((K256+128))] + movdqu xmm1,XMMWORD[rdi] + movdqu xmm2,XMMWORD[16+rdi] + movdqa xmm7,XMMWORD[((512-128))+rcx] + + pshufd xmm0,xmm1,0x1b + pshufd xmm1,xmm1,0xb1 + pshufd xmm2,xmm2,0x1b + movdqa xmm8,xmm7 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp NEAR $L$oop_shaext + +ALIGN 16 +$L$oop_shaext: + movdqu xmm3,XMMWORD[rsi] + movdqu xmm4,XMMWORD[16+rsi] + movdqu xmm5,XMMWORD[32+rsi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD[48+rsi] + + movdqa xmm0,XMMWORD[((0-128))+rcx] + paddd xmm0,xmm3 +DB 102,15,56,0,231 + movdqa xmm10,xmm2 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + nop + movdqa xmm9,xmm1 + DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((32-128))+rcx] + paddd xmm0,xmm4 +DB 102,15,56,0,239 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + lea rsi,[64+rsi] + DB 15,56,204,220 + DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((64-128))+rcx] + paddd xmm0,xmm5 +DB 102,15,56,0,247 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 + DB 15,56,204,229 + DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((96-128))+rcx] + paddd xmm0,xmm6 + DB 15,56,205,222 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 + DB 15,56,204,238 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((128-128))+rcx] + paddd xmm0,xmm3 + DB 15,56,205,227 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 + DB 15,56,204,243 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((160-128))+rcx] + paddd xmm0,xmm4 + DB 15,56,205,236 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 + DB 15,56,204,220 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((192-128))+rcx] + paddd xmm0,xmm5 + DB 15,56,205,245 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 + DB 15,56,204,229 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((224-128))+rcx] + paddd xmm0,xmm6 + DB 15,56,205,222 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 + DB 15,56,204,238 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((256-128))+rcx] + paddd xmm0,xmm3 + DB 15,56,205,227 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 + DB 15,56,204,243 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((288-128))+rcx] + paddd xmm0,xmm4 + DB 15,56,205,236 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 + DB 15,56,204,220 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((320-128))+rcx] + paddd xmm0,xmm5 + DB 15,56,205,245 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 + DB 15,56,204,229 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((352-128))+rcx] + paddd xmm0,xmm6 + DB 15,56,205,222 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 + DB 15,56,204,238 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((384-128))+rcx] + paddd xmm0,xmm3 + DB 15,56,205,227 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 + DB 15,56,204,243 + DB 15,56,203,202 + movdqa xmm0,XMMWORD[((416-128))+rcx] + paddd xmm0,xmm4 + DB 15,56,205,236 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + DB 15,56,203,202 + paddd xmm6,xmm7 + + movdqa xmm0,XMMWORD[((448-128))+rcx] + paddd xmm0,xmm5 + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + DB 15,56,205,245 + movdqa xmm7,xmm8 + DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((480-128))+rcx] + paddd xmm0,xmm6 + nop + DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + dec rdx + nop + DB 15,56,203,202 + + paddd xmm2,xmm10 + paddd xmm1,xmm9 + jnz NEAR $L$oop_shaext + + pshufd xmm2,xmm2,0xb1 + pshufd xmm7,xmm1,0x1b + pshufd xmm1,xmm1,0xb1 + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + + movdqu XMMWORD[rdi],xmm1 + movdqu XMMWORD[16+rdi],xmm2 + movaps xmm6,XMMWORD[((-8-80))+rax] + movaps xmm7,XMMWORD[((-8-64))+rax] + movaps xmm8,XMMWORD[((-8-48))+rax] + movaps xmm9,XMMWORD[((-8-32))+rax] + movaps xmm10,XMMWORD[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha256_block_data_order_hw: +global sha256_block_data_order_ssse3 + +ALIGN 64 +sha256_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,160 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + + movaps XMMWORD[(64+32)+rsp],xmm6 + movaps XMMWORD[(64+48)+rsp],xmm7 + movaps XMMWORD[(64+64)+rsp],xmm8 + movaps XMMWORD[(64+80)+rsp],xmm9 +$L$prologue_ssse3: + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + + + jmp NEAR $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3: + movdqa xmm7,XMMWORD[((K256+512))] + movdqu xmm0,XMMWORD[rsi] + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD[48+rsi] + lea rbp,[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD[rbp] + movdqa xmm5,XMMWORD[32+rbp] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD[64+rbp] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD[96+rbp] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD[48+rsp],xmm7 + mov r13d,r8d + jmp NEAR $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47: + sub rbp,-128 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD[rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD[32+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD[64+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD[96+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD[48+rsp],xmm6 + cmp BYTE[131+rbp],0 + jne NEAR $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD[rdi] + lea rsi,[64+rsi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop_ssse3 + + mov rsi,QWORD[88+rsp] + + movaps xmm6,XMMWORD[((64+32))+rsp] + movaps xmm7,XMMWORD[((64+48))+rsp] + movaps xmm8,XMMWORD[((64+64))+rsp] + movaps xmm9,XMMWORD[((64+80))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_ssse3: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha256_block_data_order_ssse3: +global sha256_block_data_order_avx + +ALIGN 64 +sha256_block_data_order_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,160 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + + movaps XMMWORD[(64+32)+rsp],xmm6 + movaps XMMWORD[(64+48)+rsp],xmm7 + movaps XMMWORD[(64+64)+rsp],xmm8 + movaps XMMWORD[(64+80)+rsp],xmm9 +$L$prologue_avx: + + vzeroupper + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + vmovdqa xmm8,XMMWORD[((K256+512+32))] + vmovdqa xmm9,XMMWORD[((K256+512+64))] + jmp NEAR $L$loop_avx +ALIGN 16 +$L$loop_avx: + vmovdqa xmm7,XMMWORD[((K256+512))] + vmovdqu xmm0,XMMWORD[rsi] + vmovdqu xmm1,XMMWORD[16+rsi] + vmovdqu xmm2,XMMWORD[32+rsi] + vmovdqu xmm3,XMMWORD[48+rsi] + vpshufb xmm0,xmm0,xmm7 + lea rbp,[K256] + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD[rbp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD[32+rbp] + vpaddd xmm6,xmm2,XMMWORD[64+rbp] + vpaddd xmm7,xmm3,XMMWORD[96+rbp] + vmovdqa XMMWORD[rsp],xmm4 + mov r14d,eax + vmovdqa XMMWORD[16+rsp],xmm5 + mov edi,ebx + vmovdqa XMMWORD[32+rsp],xmm6 + xor edi,ecx + vmovdqa XMMWORD[48+rsp],xmm7 + mov r13d,r8d + jmp NEAR $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47: + sub rbp,-128 + vpalignr xmm4,xmm1,xmm0,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm3,xmm2,4 + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm0,xmm0,xmm7 + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + vpslld xmm5,xmm4,14 + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor edi,ebx + vpshufd xmm7,xmm3,250 + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + vpaddd xmm0,xmm0,xmm4 + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + vpshufb xmm6,xmm6,xmm8 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpaddd xmm0,xmm0,xmm6 + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + vpshufd xmm7,xmm0,80 + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,r10d + add r9d,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + vpsrlq xmm7,xmm7,2 + add r9d,edi + mov r13d,ebx + add r14d,r9d + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + vpaddd xmm0,xmm0,xmm6 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpaddd xmm6,xmm0,XMMWORD[rbp] + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD[rsp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm0,xmm3,4 + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm1,xmm1,xmm7 + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + vpslld xmm5,xmm4,14 + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor edi,r9d + vpshufd xmm7,xmm0,250 + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + vpaddd xmm1,xmm1,xmm4 + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + vpshufb xmm6,xmm6,xmm8 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpaddd xmm1,xmm1,xmm6 + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + vpshufd xmm7,xmm1,80 + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,ecx + add ebx,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + vpsrlq xmm7,xmm7,2 + add ebx,edi + mov r13d,r9d + add r14d,ebx + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + vpaddd xmm1,xmm1,xmm6 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpaddd xmm6,xmm1,XMMWORD[32+rbp] + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD[16+rsp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm1,xmm0,4 + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm2,xmm2,xmm7 + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + vpslld xmm5,xmm4,14 + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor edi,ebx + vpshufd xmm7,xmm1,250 + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + vpaddd xmm2,xmm2,xmm4 + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + vpshufb xmm6,xmm6,xmm8 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpaddd xmm2,xmm2,xmm6 + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + vpshufd xmm7,xmm2,80 + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,r10d + add r9d,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + vpsrlq xmm7,xmm7,2 + add r9d,edi + mov r13d,ebx + add r14d,r9d + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + vpaddd xmm2,xmm2,xmm6 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpaddd xmm6,xmm2,XMMWORD[64+rbp] + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD[32+rsp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm2,xmm1,4 + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm3,xmm3,xmm7 + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + vpslld xmm5,xmm4,14 + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor edi,r9d + vpshufd xmm7,xmm2,250 + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + vpaddd xmm3,xmm3,xmm4 + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + vpshufb xmm6,xmm6,xmm8 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpaddd xmm3,xmm3,xmm6 + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + vpshufd xmm7,xmm3,80 + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,ecx + add ebx,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + vpsrlq xmm7,xmm7,2 + add ebx,edi + mov r13d,r9d + add r14d,ebx + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + vpaddd xmm3,xmm3,xmm6 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpaddd xmm6,xmm3,XMMWORD[96+rbp] + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD[48+rsp],xmm6 + cmp BYTE[131+rbp],0 + jne NEAR $L$avx_00_47 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD[rdi] + lea rsi,[64+rsi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop_avx + + mov rsi,QWORD[88+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((64+32))+rsp] + movaps xmm7,XMMWORD[((64+48))+rsp] + movaps xmm8,XMMWORD[((64+64))+rsp] + movaps xmm9,XMMWORD[((64+80))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha256_block_data_order_avx: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + mov rsi,rax + mov rax,QWORD[((64+24))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea r10,[$L$epilogue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea rsi,[((64+32))+rsi] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +ALIGN 16 +shaext_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue_shaext] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea r10,[$L$epilogue_shaext] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rsi,[((-8-80))+rax] + lea rdi,[512+r8] + mov ecx,10 + DD 0xa548f3fc + + jmp NEAR $L$in_prologue + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha256_block_data_order_nohw wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_nohw wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_nohw wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_hw wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_hw wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_hw wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha256_block_data_order_nohw: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha256_block_data_order_hw: + DB 9,0,0,0 + DD shaext_handler wrt ..imagebase +$L$SEH_info_sha256_block_data_order_ssse3: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase +$L$SEH_info_sha256_block_data_order_avx: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-586-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha512-586-apple.S new file mode 100644 index 0000000000..cfdeac1211 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-586-apple.S @@ -0,0 +1,2837 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _sha512_block_data_order +.private_extern _sha512_block_data_order +.align 4 +_sha512_block_data_order: +L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K512-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K512(%ebp),%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je L003SSSE3 + subl $80,%esp + jmp L004loop_sse2 +.align 4,0x90 +L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp L00500_14_sse2 +.align 4,0x90 +L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp L00616_79_sse2 +.align 4,0x90 +L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 5,0x90 +L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp L00800_47_ssse3 +.align 5,0x90 +L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 4,0x90 +L00900_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne L00900_15_x86 +.align 4,0x90 +L01016_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne L01016_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-586-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha512-586-linux.S new file mode 100644 index 0000000000..bb2884d08d --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-586-linux.S @@ -0,0 +1,2835 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl sha512_block_data_order +.hidden sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: +.L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K512-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P-.L001K512(%ebp),%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz .L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je .L003SSSE3 + subl $80,%esp + jmp .L004loop_sse2 +.align 16 +.L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp .L00500_14_sse2 +.align 16 +.L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz .L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp .L00616_79_sse2 +.align 16 +.L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz .L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb .L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 32 +.L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp .L00800_47_ssse3 +.align 32 +.L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz .L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb .L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 16 +.L00900_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne .L00900_15_x86 +.align 16 +.L01016_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne .L01016_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb .L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.size sha512_block_data_order,.-.L_sha512_block_data_order_begin +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-586-win.asm b/yass/third_party/boringssl/src/gen/bcm/sha512-586-win.asm new file mode 100644 index 0000000000..3603a6dc13 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-586-win.asm @@ -0,0 +1,2846 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _sha512_block_data_order +align 16 +_sha512_block_data_order: +L$_sha512_block_data_order_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$000pic_point +L$000pic_point: + pop ebp + lea ebp,[(L$001K512-L$000pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,7 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx + lea edx,[_OPENSSL_ia32cap_P] + mov ecx,DWORD [edx] + test ecx,67108864 + jz NEAR L$002loop_x86 + mov edx,DWORD [4+edx] + movq mm0,[esi] + and ecx,16777216 + movq mm1,[8+esi] + and edx,512 + movq mm2,[16+esi] + or ecx,edx + movq mm3,[24+esi] + movq mm4,[32+esi] + movq mm5,[40+esi] + movq mm6,[48+esi] + movq mm7,[56+esi] + cmp ecx,16777728 + je NEAR L$003SSSE3 + sub esp,80 + jmp NEAR L$004loop_sse2 +align 16 +L$004loop_sse2: + movq [8+esp],mm1 + movq [16+esp],mm2 + movq [24+esp],mm3 + movq [40+esp],mm5 + movq [48+esp],mm6 + pxor mm2,mm1 + movq [56+esp],mm7 + movq mm3,mm0 + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + add edi,8 + mov edx,15 + bswap eax + bswap ebx + jmp NEAR L$00500_14_sse2 +align 16 +L$00500_14_sse2: + movd mm1,eax + mov eax,DWORD [edi] + movd mm7,ebx + mov ebx,DWORD [4+edi] + add edi,8 + bswap eax + bswap ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + movq mm6,[48+esp] + dec edx + jnz NEAR L$00500_14_sse2 + movd mm1,eax + movd mm7,ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,[192+esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + pxor mm0,mm0 + mov edx,32 + jmp NEAR L$00616_79_sse2 +align 16 +L$00616_79_sse2: + movq mm5,[88+esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm0,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,[200+esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,[128+esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,[40+esp] + pxor mm1,mm6 + movq mm6,[48+esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,[192+esp] + paddq mm2,mm6 + add ebp,8 + movq mm5,[88+esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm2,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,[200+esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,[128+esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,[40+esp] + pxor mm1,mm6 + movq mm6,[48+esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm7,[192+esp] + paddq mm0,mm6 + add ebp,8 + dec edx + jnz NEAR L$00616_79_sse2 + paddq mm0,mm3 + movq mm1,[8+esp] + movq mm3,[24+esp] + movq mm5,[40+esp] + movq mm6,[48+esp] + movq mm7,[56+esp] + pxor mm2,mm1 + paddq mm0,[esi] + paddq mm1,[8+esi] + paddq mm2,[16+esi] + paddq mm3,[24+esi] + paddq mm4,[32+esi] + paddq mm5,[40+esi] + paddq mm6,[48+esi] + paddq mm7,[56+esi] + mov eax,640 + movq [esi],mm0 + movq [8+esi],mm1 + movq [16+esi],mm2 + movq [24+esi],mm3 + movq [32+esi],mm4 + movq [40+esi],mm5 + movq [48+esi],mm6 + movq [56+esi],mm7 + lea esp,[eax*1+esp] + sub ebp,eax + cmp edi,DWORD [88+esp] + jb NEAR L$004loop_sse2 + mov esp,DWORD [92+esp] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +align 32 +L$003SSSE3: + lea edx,[esp-64] + sub esp,256 + movdqa xmm1,[640+ebp] + movdqu xmm0,[edi] +db 102,15,56,0,193 + movdqa xmm3,[ebp] + movdqa xmm2,xmm1 + movdqu xmm1,[16+edi] + paddq xmm3,xmm0 +db 102,15,56,0,202 + movdqa [edx-128],xmm3 + movdqa xmm4,[16+ebp] + movdqa xmm3,xmm2 + movdqu xmm2,[32+edi] + paddq xmm4,xmm1 +db 102,15,56,0,211 + movdqa [edx-112],xmm4 + movdqa xmm5,[32+ebp] + movdqa xmm4,xmm3 + movdqu xmm3,[48+edi] + paddq xmm5,xmm2 +db 102,15,56,0,220 + movdqa [edx-96],xmm5 + movdqa xmm6,[48+ebp] + movdqa xmm5,xmm4 + movdqu xmm4,[64+edi] + paddq xmm6,xmm3 +db 102,15,56,0,229 + movdqa [edx-80],xmm6 + movdqa xmm7,[64+ebp] + movdqa xmm6,xmm5 + movdqu xmm5,[80+edi] + paddq xmm7,xmm4 +db 102,15,56,0,238 + movdqa [edx-64],xmm7 + movdqa [edx],xmm0 + movdqa xmm0,[80+ebp] + movdqa xmm7,xmm6 + movdqu xmm6,[96+edi] + paddq xmm0,xmm5 +db 102,15,56,0,247 + movdqa [edx-48],xmm0 + movdqa [16+edx],xmm1 + movdqa xmm1,[96+ebp] + movdqa xmm0,xmm7 + movdqu xmm7,[112+edi] + paddq xmm1,xmm6 +db 102,15,56,0,248 + movdqa [edx-32],xmm1 + movdqa [32+edx],xmm2 + movdqa xmm2,[112+ebp] + movdqa xmm0,[edx] + paddq xmm2,xmm7 + movdqa [edx-16],xmm2 + nop +align 32 +L$007loop_ssse3: + movdqa xmm2,[16+edx] + movdqa [48+edx],xmm3 + lea ebp,[128+ebp] + movq [8+esp],mm1 + mov ebx,edi + movq [16+esp],mm2 + lea edi,[128+edi] + movq [24+esp],mm3 + cmp edi,eax + movq [40+esp],mm5 + cmovb ebx,edi + movq [48+esp],mm6 + mov ecx,4 + pxor mm2,mm1 + movq [56+esp],mm7 + pxor mm3,mm3 + jmp NEAR L$00800_47_ssse3 +align 32 +L$00800_47_ssse3: + movdqa xmm3,xmm5 + movdqa xmm1,xmm2 +db 102,15,58,15,208,8 + movdqa [edx],xmm4 +db 102,15,58,15,220,8 + movdqa xmm4,xmm2 + psrlq xmm2,7 + paddq xmm0,xmm3 + movdqa xmm3,xmm4 + psrlq xmm4,1 + psllq xmm3,56 + pxor xmm2,xmm4 + psrlq xmm4,7 + pxor xmm2,xmm3 + psllq xmm3,7 + pxor xmm2,xmm4 + movdqa xmm4,xmm7 + pxor xmm2,xmm3 + movdqa xmm3,xmm7 + psrlq xmm4,6 + paddq xmm0,xmm2 + movdqa xmm2,xmm7 + psrlq xmm3,19 + psllq xmm2,3 + pxor xmm4,xmm3 + psrlq xmm3,42 + pxor xmm4,xmm2 + psllq xmm2,42 + pxor xmm4,xmm3 + movdqa xmm3,[32+edx] + pxor xmm4,xmm2 + movdqa xmm2,[ebp] + movq mm1,mm4 + paddq xmm0,xmm4 + movq mm7,[edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + paddq xmm2,xmm0 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-128],xmm2 + movdqa xmm4,xmm6 + movdqa xmm2,xmm3 +db 102,15,58,15,217,8 + movdqa [16+edx],xmm5 +db 102,15,58,15,229,8 + movdqa xmm5,xmm3 + psrlq xmm3,7 + paddq xmm1,xmm4 + movdqa xmm4,xmm5 + psrlq xmm5,1 + psllq xmm4,56 + pxor xmm3,xmm5 + psrlq xmm5,7 + pxor xmm3,xmm4 + psllq xmm4,7 + pxor xmm3,xmm5 + movdqa xmm5,xmm0 + pxor xmm3,xmm4 + movdqa xmm4,xmm0 + psrlq xmm5,6 + paddq xmm1,xmm3 + movdqa xmm3,xmm0 + psrlq xmm4,19 + psllq xmm3,3 + pxor xmm5,xmm4 + psrlq xmm4,42 + pxor xmm5,xmm3 + psllq xmm3,42 + pxor xmm5,xmm4 + movdqa xmm4,[48+edx] + pxor xmm5,xmm3 + movdqa xmm3,[16+ebp] + movq mm1,mm4 + paddq xmm1,xmm5 + movq mm7,[edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + paddq xmm3,xmm1 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-112],xmm3 + movdqa xmm5,xmm7 + movdqa xmm3,xmm4 +db 102,15,58,15,226,8 + movdqa [32+edx],xmm6 +db 102,15,58,15,238,8 + movdqa xmm6,xmm4 + psrlq xmm4,7 + paddq xmm2,xmm5 + movdqa xmm5,xmm6 + psrlq xmm6,1 + psllq xmm5,56 + pxor xmm4,xmm6 + psrlq xmm6,7 + pxor xmm4,xmm5 + psllq xmm5,7 + pxor xmm4,xmm6 + movdqa xmm6,xmm1 + pxor xmm4,xmm5 + movdqa xmm5,xmm1 + psrlq xmm6,6 + paddq xmm2,xmm4 + movdqa xmm4,xmm1 + psrlq xmm5,19 + psllq xmm4,3 + pxor xmm6,xmm5 + psrlq xmm5,42 + pxor xmm6,xmm4 + psllq xmm4,42 + pxor xmm6,xmm5 + movdqa xmm5,[edx] + pxor xmm6,xmm4 + movdqa xmm4,[32+ebp] + movq mm1,mm4 + paddq xmm2,xmm6 + movq mm7,[edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + paddq xmm4,xmm2 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-96],xmm4 + movdqa xmm6,xmm0 + movdqa xmm4,xmm5 +db 102,15,58,15,235,8 + movdqa [48+edx],xmm7 +db 102,15,58,15,247,8 + movdqa xmm7,xmm5 + psrlq xmm5,7 + paddq xmm3,xmm6 + movdqa xmm6,xmm7 + psrlq xmm7,1 + psllq xmm6,56 + pxor xmm5,xmm7 + psrlq xmm7,7 + pxor xmm5,xmm6 + psllq xmm6,7 + pxor xmm5,xmm7 + movdqa xmm7,xmm2 + pxor xmm5,xmm6 + movdqa xmm6,xmm2 + psrlq xmm7,6 + paddq xmm3,xmm5 + movdqa xmm5,xmm2 + psrlq xmm6,19 + psllq xmm5,3 + pxor xmm7,xmm6 + psrlq xmm6,42 + pxor xmm7,xmm5 + psllq xmm5,42 + pxor xmm7,xmm6 + movdqa xmm6,[16+edx] + pxor xmm7,xmm5 + movdqa xmm5,[48+ebp] + movq mm1,mm4 + paddq xmm3,xmm7 + movq mm7,[edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + paddq xmm5,xmm3 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-80],xmm5 + movdqa xmm7,xmm1 + movdqa xmm5,xmm6 +db 102,15,58,15,244,8 + movdqa [edx],xmm0 +db 102,15,58,15,248,8 + movdqa xmm0,xmm6 + psrlq xmm6,7 + paddq xmm4,xmm7 + movdqa xmm7,xmm0 + psrlq xmm0,1 + psllq xmm7,56 + pxor xmm6,xmm0 + psrlq xmm0,7 + pxor xmm6,xmm7 + psllq xmm7,7 + pxor xmm6,xmm0 + movdqa xmm0,xmm3 + pxor xmm6,xmm7 + movdqa xmm7,xmm3 + psrlq xmm0,6 + paddq xmm4,xmm6 + movdqa xmm6,xmm3 + psrlq xmm7,19 + psllq xmm6,3 + pxor xmm0,xmm7 + psrlq xmm7,42 + pxor xmm0,xmm6 + psllq xmm6,42 + pxor xmm0,xmm7 + movdqa xmm7,[32+edx] + pxor xmm0,xmm6 + movdqa xmm6,[64+ebp] + movq mm1,mm4 + paddq xmm4,xmm0 + movq mm7,[edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + paddq xmm6,xmm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-64],xmm6 + movdqa xmm0,xmm2 + movdqa xmm6,xmm7 +db 102,15,58,15,253,8 + movdqa [16+edx],xmm1 +db 102,15,58,15,193,8 + movdqa xmm1,xmm7 + psrlq xmm7,7 + paddq xmm5,xmm0 + movdqa xmm0,xmm1 + psrlq xmm1,1 + psllq xmm0,56 + pxor xmm7,xmm1 + psrlq xmm1,7 + pxor xmm7,xmm0 + psllq xmm0,7 + pxor xmm7,xmm1 + movdqa xmm1,xmm4 + pxor xmm7,xmm0 + movdqa xmm0,xmm4 + psrlq xmm1,6 + paddq xmm5,xmm7 + movdqa xmm7,xmm4 + psrlq xmm0,19 + psllq xmm7,3 + pxor xmm1,xmm0 + psrlq xmm0,42 + pxor xmm1,xmm7 + psllq xmm7,42 + pxor xmm1,xmm0 + movdqa xmm0,[48+edx] + pxor xmm1,xmm7 + movdqa xmm7,[80+ebp] + movq mm1,mm4 + paddq xmm5,xmm1 + movq mm7,[edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + paddq xmm7,xmm5 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-48],xmm7 + movdqa xmm1,xmm3 + movdqa xmm7,xmm0 +db 102,15,58,15,198,8 + movdqa [32+edx],xmm2 +db 102,15,58,15,202,8 + movdqa xmm2,xmm0 + psrlq xmm0,7 + paddq xmm6,xmm1 + movdqa xmm1,xmm2 + psrlq xmm2,1 + psllq xmm1,56 + pxor xmm0,xmm2 + psrlq xmm2,7 + pxor xmm0,xmm1 + psllq xmm1,7 + pxor xmm0,xmm2 + movdqa xmm2,xmm5 + pxor xmm0,xmm1 + movdqa xmm1,xmm5 + psrlq xmm2,6 + paddq xmm6,xmm0 + movdqa xmm0,xmm5 + psrlq xmm1,19 + psllq xmm0,3 + pxor xmm2,xmm1 + psrlq xmm1,42 + pxor xmm2,xmm0 + psllq xmm0,42 + pxor xmm2,xmm1 + movdqa xmm1,[edx] + pxor xmm2,xmm0 + movdqa xmm0,[96+ebp] + movq mm1,mm4 + paddq xmm6,xmm2 + movq mm7,[edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + paddq xmm0,xmm6 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-32],xmm0 + movdqa xmm2,xmm4 + movdqa xmm0,xmm1 +db 102,15,58,15,207,8 + movdqa [48+edx],xmm3 +db 102,15,58,15,211,8 + movdqa xmm3,xmm1 + psrlq xmm1,7 + paddq xmm7,xmm2 + movdqa xmm2,xmm3 + psrlq xmm3,1 + psllq xmm2,56 + pxor xmm1,xmm3 + psrlq xmm3,7 + pxor xmm1,xmm2 + psllq xmm2,7 + pxor xmm1,xmm3 + movdqa xmm3,xmm6 + pxor xmm1,xmm2 + movdqa xmm2,xmm6 + psrlq xmm3,6 + paddq xmm7,xmm1 + movdqa xmm1,xmm6 + psrlq xmm2,19 + psllq xmm1,3 + pxor xmm3,xmm2 + psrlq xmm2,42 + pxor xmm3,xmm1 + psllq xmm1,42 + pxor xmm3,xmm2 + movdqa xmm2,[16+edx] + pxor xmm3,xmm1 + movdqa xmm1,[112+ebp] + movq mm1,mm4 + paddq xmm7,xmm3 + movq mm7,[edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + paddq xmm1,xmm7 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-16],xmm1 + lea ebp,[128+ebp] + dec ecx + jnz NEAR L$00800_47_ssse3 + movdqa xmm1,[ebp] + lea ebp,[ebp-640] + movdqu xmm0,[ebx] +db 102,15,56,0,193 + movdqa xmm3,[ebp] + movdqa xmm2,xmm1 + movdqu xmm1,[16+ebx] + paddq xmm3,xmm0 +db 102,15,56,0,202 + movq mm1,mm4 + movq mm7,[edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-128],xmm3 + movdqa xmm4,[16+ebp] + movdqa xmm3,xmm2 + movdqu xmm2,[32+ebx] + paddq xmm4,xmm1 +db 102,15,56,0,211 + movq mm1,mm4 + movq mm7,[edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-112],xmm4 + movdqa xmm5,[32+ebp] + movdqa xmm4,xmm3 + movdqu xmm3,[48+ebx] + paddq xmm5,xmm2 +db 102,15,56,0,220 + movq mm1,mm4 + movq mm7,[edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-96],xmm5 + movdqa xmm6,[48+ebp] + movdqa xmm5,xmm4 + movdqu xmm4,[64+ebx] + paddq xmm6,xmm3 +db 102,15,56,0,229 + movq mm1,mm4 + movq mm7,[edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-80],xmm6 + movdqa xmm7,[64+ebp] + movdqa xmm6,xmm5 + movdqu xmm5,[80+ebx] + paddq xmm7,xmm4 +db 102,15,56,0,238 + movq mm1,mm4 + movq mm7,[edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-64],xmm7 + movdqa [edx],xmm0 + movdqa xmm0,[80+ebp] + movdqa xmm7,xmm6 + movdqu xmm6,[96+ebx] + paddq xmm0,xmm5 +db 102,15,56,0,247 + movq mm1,mm4 + movq mm7,[edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-48],xmm0 + movdqa [16+edx],xmm1 + movdqa xmm1,[96+ebp] + movdqa xmm0,xmm7 + movdqu xmm7,[112+ebx] + paddq xmm1,xmm6 +db 102,15,56,0,248 + movq mm1,mm4 + movq mm7,[edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-32],xmm1 + movdqa [32+edx],xmm2 + movdqa xmm2,[112+ebp] + movdqa xmm0,[edx] + paddq xmm2,xmm7 + movq mm1,mm4 + movq mm7,[edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-16],xmm2 + movq mm1,[8+esp] + paddq mm0,mm3 + movq mm3,[24+esp] + movq mm7,[56+esp] + pxor mm2,mm1 + paddq mm0,[esi] + paddq mm1,[8+esi] + paddq mm2,[16+esi] + paddq mm3,[24+esi] + paddq mm4,[32+esi] + paddq mm5,[40+esi] + paddq mm6,[48+esi] + paddq mm7,[56+esi] + movq [esi],mm0 + movq [8+esi],mm1 + movq [16+esi],mm2 + movq [24+esi],mm3 + movq [32+esi],mm4 + movq [40+esi],mm5 + movq [48+esi],mm6 + movq [56+esi],mm7 + cmp edi,eax + jb NEAR L$007loop_ssse3 + mov esp,DWORD [76+edx] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +L$002loop_x86: + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [16+edi] + mov ebx,DWORD [20+edi] + mov ecx,DWORD [24+edi] + mov edx,DWORD [28+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [32+edi] + mov ebx,DWORD [36+edi] + mov ecx,DWORD [40+edi] + mov edx,DWORD [44+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [48+edi] + mov ebx,DWORD [52+edi] + mov ecx,DWORD [56+edi] + mov edx,DWORD [60+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [64+edi] + mov ebx,DWORD [68+edi] + mov ecx,DWORD [72+edi] + mov edx,DWORD [76+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [80+edi] + mov ebx,DWORD [84+edi] + mov ecx,DWORD [88+edi] + mov edx,DWORD [92+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [96+edi] + mov ebx,DWORD [100+edi] + mov ecx,DWORD [104+edi] + mov edx,DWORD [108+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [112+edi] + mov ebx,DWORD [116+edi] + mov ecx,DWORD [120+edi] + mov edx,DWORD [124+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + add edi,128 + sub esp,72 + mov DWORD [204+esp],edi + lea edi,[8+esp] + mov ecx,16 +dd 2784229001 +align 16 +L$00900_15_x86: + mov ecx,DWORD [40+esp] + mov edx,DWORD [44+esp] + mov esi,ecx + shr ecx,9 + mov edi,edx + shr edx,9 + mov ebx,ecx + shl esi,14 + mov eax,edx + shl edi,14 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor eax,ecx + shl esi,4 + xor ebx,edx + shl edi,4 + xor ebx,esi + shr ecx,4 + xor eax,edi + shr edx,4 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [48+esp] + mov edx,DWORD [52+esp] + mov esi,DWORD [56+esp] + mov edi,DWORD [60+esp] + add eax,DWORD [64+esp] + adc ebx,DWORD [68+esp] + xor ecx,esi + xor edx,edi + and ecx,DWORD [40+esp] + and edx,DWORD [44+esp] + add eax,DWORD [192+esp] + adc ebx,DWORD [196+esp] + xor ecx,esi + xor edx,edi + mov esi,DWORD [ebp] + mov edi,DWORD [4+ebp] + add eax,ecx + adc ebx,edx + mov ecx,DWORD [32+esp] + mov edx,DWORD [36+esp] + add eax,esi + adc ebx,edi + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + add eax,ecx + adc ebx,edx + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov esi,ecx + shr ecx,2 + mov edi,edx + shr edx,2 + mov ebx,ecx + shl esi,4 + mov eax,edx + shl edi,4 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor ebx,ecx + shl esi,21 + xor eax,edx + shl edi,21 + xor eax,esi + shr ecx,21 + xor ebx,edi + shr edx,21 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov esi,DWORD [16+esp] + mov edi,DWORD [20+esp] + add eax,DWORD [esp] + adc ebx,DWORD [4+esp] + or ecx,esi + or edx,edi + and ecx,DWORD [24+esp] + and edx,DWORD [28+esp] + and esi,DWORD [8+esp] + and edi,DWORD [12+esp] + or ecx,esi + or edx,edi + add eax,ecx + adc ebx,edx + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov dl,BYTE [ebp] + sub esp,8 + lea ebp,[8+ebp] + cmp dl,148 + jne NEAR L$00900_15_x86 +align 16 +L$01016_79_x86: + mov ecx,DWORD [312+esp] + mov edx,DWORD [316+esp] + mov esi,ecx + shr ecx,1 + mov edi,edx + shr edx,1 + mov eax,ecx + shl esi,24 + mov ebx,edx + shl edi,24 + xor ebx,esi + shr ecx,6 + xor eax,edi + shr edx,6 + xor eax,ecx + shl esi,7 + xor ebx,edx + shl edi,1 + xor ebx,esi + shr ecx,1 + xor eax,edi + shr edx,1 + xor eax,ecx + shl edi,6 + xor ebx,edx + xor eax,edi + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov ecx,DWORD [208+esp] + mov edx,DWORD [212+esp] + mov esi,ecx + shr ecx,6 + mov edi,edx + shr edx,6 + mov eax,ecx + shl esi,3 + mov ebx,edx + shl edi,3 + xor eax,esi + shr ecx,13 + xor ebx,edi + shr edx,13 + xor eax,ecx + shl esi,10 + xor ebx,edx + shl edi,10 + xor ebx,esi + shr ecx,10 + xor eax,edi + shr edx,10 + xor ebx,ecx + shl edi,13 + xor eax,edx + xor eax,edi + mov ecx,DWORD [320+esp] + mov edx,DWORD [324+esp] + add eax,DWORD [esp] + adc ebx,DWORD [4+esp] + mov esi,DWORD [248+esp] + mov edi,DWORD [252+esp] + add eax,ecx + adc ebx,edx + add eax,esi + adc ebx,edi + mov DWORD [192+esp],eax + mov DWORD [196+esp],ebx + mov ecx,DWORD [40+esp] + mov edx,DWORD [44+esp] + mov esi,ecx + shr ecx,9 + mov edi,edx + shr edx,9 + mov ebx,ecx + shl esi,14 + mov eax,edx + shl edi,14 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor eax,ecx + shl esi,4 + xor ebx,edx + shl edi,4 + xor ebx,esi + shr ecx,4 + xor eax,edi + shr edx,4 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [48+esp] + mov edx,DWORD [52+esp] + mov esi,DWORD [56+esp] + mov edi,DWORD [60+esp] + add eax,DWORD [64+esp] + adc ebx,DWORD [68+esp] + xor ecx,esi + xor edx,edi + and ecx,DWORD [40+esp] + and edx,DWORD [44+esp] + add eax,DWORD [192+esp] + adc ebx,DWORD [196+esp] + xor ecx,esi + xor edx,edi + mov esi,DWORD [ebp] + mov edi,DWORD [4+ebp] + add eax,ecx + adc ebx,edx + mov ecx,DWORD [32+esp] + mov edx,DWORD [36+esp] + add eax,esi + adc ebx,edi + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + add eax,ecx + adc ebx,edx + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov esi,ecx + shr ecx,2 + mov edi,edx + shr edx,2 + mov ebx,ecx + shl esi,4 + mov eax,edx + shl edi,4 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor ebx,ecx + shl esi,21 + xor eax,edx + shl edi,21 + xor eax,esi + shr ecx,21 + xor ebx,edi + shr edx,21 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov esi,DWORD [16+esp] + mov edi,DWORD [20+esp] + add eax,DWORD [esp] + adc ebx,DWORD [4+esp] + or ecx,esi + or edx,edi + and ecx,DWORD [24+esp] + and edx,DWORD [28+esp] + and esi,DWORD [8+esp] + and edi,DWORD [12+esp] + or ecx,esi + or edx,edi + add eax,ecx + adc ebx,edx + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov dl,BYTE [ebp] + sub esp,8 + lea ebp,[8+ebp] + cmp dl,23 + jne NEAR L$01016_79_x86 + mov esi,DWORD [840+esp] + mov edi,DWORD [844+esp] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + add eax,DWORD [8+esp] + adc ebx,DWORD [12+esp] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + add ecx,DWORD [16+esp] + adc edx,DWORD [20+esp] + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + mov eax,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edx,DWORD [28+esi] + add eax,DWORD [24+esp] + adc ebx,DWORD [28+esp] + mov DWORD [16+esi],eax + mov DWORD [20+esi],ebx + add ecx,DWORD [32+esp] + adc edx,DWORD [36+esp] + mov DWORD [24+esi],ecx + mov DWORD [28+esi],edx + mov eax,DWORD [32+esi] + mov ebx,DWORD [36+esi] + mov ecx,DWORD [40+esi] + mov edx,DWORD [44+esi] + add eax,DWORD [40+esp] + adc ebx,DWORD [44+esp] + mov DWORD [32+esi],eax + mov DWORD [36+esi],ebx + add ecx,DWORD [48+esp] + adc edx,DWORD [52+esp] + mov DWORD [40+esi],ecx + mov DWORD [44+esi],edx + mov eax,DWORD [48+esi] + mov ebx,DWORD [52+esi] + mov ecx,DWORD [56+esi] + mov edx,DWORD [60+esi] + add eax,DWORD [56+esp] + adc ebx,DWORD [60+esp] + mov DWORD [48+esi],eax + mov DWORD [52+esi],ebx + add ecx,DWORD [64+esp] + adc edx,DWORD [68+esp] + mov DWORD [56+esi],ecx + mov DWORD [60+esi],edx + add esp,840 + sub ebp,640 + cmp edi,DWORD [8+esp] + jb NEAR L$002loop_x86 + mov esp,DWORD [12+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$001K512: +dd 3609767458,1116352408 +dd 602891725,1899447441 +dd 3964484399,3049323471 +dd 2173295548,3921009573 +dd 4081628472,961987163 +dd 3053834265,1508970993 +dd 2937671579,2453635748 +dd 3664609560,2870763221 +dd 2734883394,3624381080 +dd 1164996542,310598401 +dd 1323610764,607225278 +dd 3590304994,1426881987 +dd 4068182383,1925078388 +dd 991336113,2162078206 +dd 633803317,2614888103 +dd 3479774868,3248222580 +dd 2666613458,3835390401 +dd 944711139,4022224774 +dd 2341262773,264347078 +dd 2007800933,604807628 +dd 1495990901,770255983 +dd 1856431235,1249150122 +dd 3175218132,1555081692 +dd 2198950837,1996064986 +dd 3999719339,2554220882 +dd 766784016,2821834349 +dd 2566594879,2952996808 +dd 3203337956,3210313671 +dd 1034457026,3336571891 +dd 2466948901,3584528711 +dd 3758326383,113926993 +dd 168717936,338241895 +dd 1188179964,666307205 +dd 1546045734,773529912 +dd 1522805485,1294757372 +dd 2643833823,1396182291 +dd 2343527390,1695183700 +dd 1014477480,1986661051 +dd 1206759142,2177026350 +dd 344077627,2456956037 +dd 1290863460,2730485921 +dd 3158454273,2820302411 +dd 3505952657,3259730800 +dd 106217008,3345764771 +dd 3606008344,3516065817 +dd 1432725776,3600352804 +dd 1467031594,4094571909 +dd 851169720,275423344 +dd 3100823752,430227734 +dd 1363258195,506948616 +dd 3750685593,659060556 +dd 3785050280,883997877 +dd 3318307427,958139571 +dd 3812723403,1322822218 +dd 2003034995,1537002063 +dd 3602036899,1747873779 +dd 1575990012,1955562222 +dd 1125592928,2024104815 +dd 2716904306,2227730452 +dd 442776044,2361852424 +dd 593698344,2428436474 +dd 3733110249,2756734187 +dd 2999351573,3204031479 +dd 3815920427,3329325298 +dd 3928383900,3391569614 +dd 566280711,3515267271 +dd 3454069534,3940187606 +dd 4000239992,4118630271 +dd 1914138554,116418474 +dd 2731055270,174292421 +dd 3203993006,289380356 +dd 320620315,460393269 +dd 587496836,685471733 +dd 1086792851,852142971 +dd 365543100,1017036298 +dd 2618297676,1126000580 +dd 3409855158,1288033470 +dd 4234509866,1501505948 +dd 987167468,1607167915 +dd 1246189591,1816402316 +dd 67438087,66051 +dd 202182159,134810123 +db 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +db 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +db 62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-armv4-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha512-armv4-linux.S new file mode 100644 index 0000000000..5500686524 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-armv4-linux.S @@ -0,0 +1,1855 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +@ +@ Licensed under the OpenSSL license (the "License"). You may not use +@ this file except in compliance with the License. You can obtain a copy +@ in the file LICENSE in the source distribution or at +@ https://www.openssl.org/source/license.html + + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA512 block procedure for ARMv4. September 2007. + +@ This code is ~4.5 (four and a half) times faster than code generated +@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +@ Xscale PXA250 core]. +@ +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 6% improvement on +@ Cortex A8 core and ~40 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 7% +@ improvement on Coxtex A8 core and ~38 cycles per byte. + +@ March 2011. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process +@ one byte in 23.3 cycles or ~60% faster than integer-only code. + +@ August 2012. +@ +@ Improve NEON performance by 12% on Snapdragon S4. In absolute +@ terms it's 22.6 cycles per byte, which is disappointing result. +@ Technical writers asserted that 3-way S4 pipeline can sustain +@ multiple NEON instructions per cycle, but dual NEON issue could +@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html +@ for further details. On side note Cortex-A15 processes one byte in +@ 16 cycles. + +@ Byte order [in]dependence. ========================================= +@ +@ Originally caller was expected to maintain specific *dword* order in +@ h[0-7], namely with most significant dword at *lower* address, which +@ was reflected in below two parameters as 0 and 4. Now caller is +@ expected to maintain native byte order for whole 64-bit values. +#ifndef __KERNEL__ +# include +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both +@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. +.arch armv7-a + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +#if defined(__thumb2__) +.syntax unified +.thumb +# define adrl adr +#else +.code 32 +#endif + +.type K512,%object +.align 5 +K512: + WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) + WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) + WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) + WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) + WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) + WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) + WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) + WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) + WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) + WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) + WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) + WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) + WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) + WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) + WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) + WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) + WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) + WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) + WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) + WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) + WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) + WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) + WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) + WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) + WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) + WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) + WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) + WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) + WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) + WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) + WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) + WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) + WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) + WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) + WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) + WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) + WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) + WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) + WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) + WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 + +.globl sha512_block_data_order_nohw +.hidden sha512_block_data_order_nohw +.type sha512_block_data_order_nohw,%function +sha512_block_data_order_nohw: + add r2,r1,r2,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + adr r14,K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH<7 + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 +#if __ARM_ARCH>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl sha512_block_data_order_neon +.hidden sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function +.align 4 +sha512_block_data_order_neon: + dmb @ errata #451034 on early Cortex A8 + add r2,r1,r2,lsl#7 @ len to point at the end of inp + adr r3,K512 + VFP_ABI_PUSH + vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + bne .L16_79_neon + + vadd.i64 d16,d30 @ h+=Maj from the past + vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + bx lr @ .word 0xe12fff1e +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon +#endif +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-apple.S new file mode 100644 index 0000000000..8c98e063e3 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-apple.S @@ -0,0 +1,1596 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. + +#ifndef __KERNEL__ +# include +#endif + +.text + +.globl _sha512_block_data_order_nohw +.private_extern _sha512_block_data_order_nohw + +.align 6 +_sha512_block_data_order_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adrp x30,LK512@PAGE + add x30,x30,LK512@PAGEOFF + stp x0,x2,[x29,#96] + +Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.section __TEXT,__const +.align 6 + +LK512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0 // terminator + +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.text +#ifndef __KERNEL__ +.globl _sha512_block_data_order_hw +.private_extern _sha512_block_data_order_hw + +.align 6 +_sha512_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context + adrp x3,LK512@PAGE + add x3,x3,LK512@PAGEOFF + + rev64 v16.16b,v16.16b + rev64 v17.16b,v17.16b + rev64 v18.16b,v18.16b + rev64 v19.16b,v19.16b + rev64 v20.16b,v20.16b + rev64 v21.16b,v21.16b + rev64 v22.16b,v22.16b + rev64 v23.16b,v23.16b + b Loop_hw + +.align 4 +Loop_hw: + ld1 {v24.2d},[x3],#16 + subs x2,x2,#1 + sub x4,x1,#128 + orr v26.16b,v0.16b,v0.16b // offload + orr v27.16b,v1.16b,v1.16b + orr v28.16b,v2.16b,v2.16b + orr v29.16b,v3.16b,v3.16b + csel x1,x1,x4,ne // conditional rewind + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v16.2d + ld1 {v16.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v16.16b,v16.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v17.2d + ld1 {v17.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v17.16b,v17.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v18.2d + ld1 {v18.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v18.16b,v18.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v19.2d + ld1 {v19.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + rev64 v19.16b,v19.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v20.2d + ld1 {v20.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + rev64 v20.16b,v20.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v21.2d + ld1 {v21.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v21.16b,v21.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v22.2d + ld1 {v22.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v22.16b,v22.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + sub x3,x3,#80*8 // rewind + add v25.2d,v25.2d,v23.2d + ld1 {v23.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v23.16b,v23.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v0.2d,v0.2d,v26.2d // accumulate + add v1.2d,v1.2d,v27.2d + add v2.2d,v2.2d,v28.2d + add v3.2d,v3.2d,v29.2d + + cbnz x2,Loop_hw + + st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context + + ldr x29,[sp],#16 + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-linux.S new file mode 100644 index 0000000000..fd159877e0 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-linux.S @@ -0,0 +1,1596 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. + +#ifndef __KERNEL__ +# include +#endif + +.text + +.globl sha512_block_data_order_nohw +.hidden sha512_block_data_order_nohw +.type sha512_block_data_order_nohw,%function +.align 6 +sha512_block_data_order_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adrp x30,.LK512 + add x30,x30,:lo12:.LK512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw + +.section .rodata +.align 6 +.type .LK512,%object +.LK512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0 // terminator +.size .LK512,.-.LK512 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.text +#ifndef __KERNEL__ +.globl sha512_block_data_order_hw +.hidden sha512_block_data_order_hw +.type sha512_block_data_order_hw,%function +.align 6 +sha512_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context + adrp x3,.LK512 + add x3,x3,:lo12:.LK512 + + rev64 v16.16b,v16.16b + rev64 v17.16b,v17.16b + rev64 v18.16b,v18.16b + rev64 v19.16b,v19.16b + rev64 v20.16b,v20.16b + rev64 v21.16b,v21.16b + rev64 v22.16b,v22.16b + rev64 v23.16b,v23.16b + b .Loop_hw + +.align 4 +.Loop_hw: + ld1 {v24.2d},[x3],#16 + subs x2,x2,#1 + sub x4,x1,#128 + orr v26.16b,v0.16b,v0.16b // offload + orr v27.16b,v1.16b,v1.16b + orr v28.16b,v2.16b,v2.16b + orr v29.16b,v3.16b,v3.16b + csel x1,x1,x4,ne // conditional rewind + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v16.2d + ld1 {v16.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v16.16b,v16.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v17.2d + ld1 {v17.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v17.16b,v17.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v18.2d + ld1 {v18.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v18.16b,v18.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v19.2d + ld1 {v19.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + rev64 v19.16b,v19.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v20.2d + ld1 {v20.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + rev64 v20.16b,v20.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v21.2d + ld1 {v21.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v21.16b,v21.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v22.2d + ld1 {v22.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v22.16b,v22.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + sub x3,x3,#80*8 // rewind + add v25.2d,v25.2d,v23.2d + ld1 {v23.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v23.16b,v23.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v0.2d,v0.2d,v26.2d // accumulate + add v1.2d,v1.2d,v27.2d + add v2.2d,v2.2d,v28.2d + add v3.2d,v3.2d,v29.2d + + cbnz x2,.Loop_hw + + st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context + + ldr x29,[sp],#16 + ret +.size sha512_block_data_order_hw,.-sha512_block_data_order_hw +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-win.S new file mode 100644 index 0000000000..220f4891da --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-armv8-win.S @@ -0,0 +1,1600 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. + +#ifndef __KERNEL__ +# include +#endif + +.text + +.globl sha512_block_data_order_nohw + +.def sha512_block_data_order_nohw + .type 32 +.endef +.align 6 +sha512_block_data_order_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adrp x30,LK512 + add x30,x30,:lo12:LK512 + stp x0,x2,[x29,#96] + +Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.section .rodata +.align 6 + +LK512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0 // terminator + +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.text +#ifndef __KERNEL__ +.globl sha512_block_data_order_hw + +.def sha512_block_data_order_hw + .type 32 +.endef +.align 6 +sha512_block_data_order_hw: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context + adrp x3,LK512 + add x3,x3,:lo12:LK512 + + rev64 v16.16b,v16.16b + rev64 v17.16b,v17.16b + rev64 v18.16b,v18.16b + rev64 v19.16b,v19.16b + rev64 v20.16b,v20.16b + rev64 v21.16b,v21.16b + rev64 v22.16b,v22.16b + rev64 v23.16b,v23.16b + b Loop_hw + +.align 4 +Loop_hw: + ld1 {v24.2d},[x3],#16 + subs x2,x2,#1 + sub x4,x1,#128 + orr v26.16b,v0.16b,v0.16b // offload + orr v27.16b,v1.16b,v1.16b + orr v28.16b,v2.16b,v2.16b + orr v29.16b,v3.16b,v3.16b + csel x1,x1,x4,ne // conditional rewind + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.long 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.long 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v16.2d + ld1 {v16.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v16.16b,v16.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v17.2d + ld1 {v17.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v17.16b,v17.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v18.2d + ld1 {v18.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v18.16b,v18.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v19.2d + ld1 {v19.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + rev64 v19.16b,v19.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v20.2d + ld1 {v20.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + rev64 v20.16b,v20.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v21.2d + ld1 {v21.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v21.16b,v21.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v22.2d + ld1 {v22.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v22.16b,v22.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + sub x3,x3,#80*8 // rewind + add v25.2d,v25.2d,v23.2d + ld1 {v23.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v23.16b,v23.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v0.2d,v0.2d,v26.2d // accumulate + add v1.2d,v1.2d,v27.2d + add v2.2d,v2.2d,v28.2d + add v3.2d,v3.2d,v29.2d + + cbnz x2,Loop_hw + + st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context + + ldr x29,[sp],#16 + ret + +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-apple.S new file mode 100644 index 0000000000..58f27a48a8 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-apple.S @@ -0,0 +1,2978 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.globl _sha512_block_data_order_nohw +.private_extern _sha512_block_data_order_nohw + +.p2align 4 +_sha512_block_data_order_nohw: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) + +L$prologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop + +.p2align 4 +L$loop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz L$rounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop + + movq 152(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue: + ret + + +.section __DATA,__const +.p2align 6 + +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +.globl _sha512_block_data_order_avx +.private_extern _sha512_block_data_order_avx + +.p2align 6 +_sha512_block_data_order_avx: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) + +L$prologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop_avx +.p2align 4 +L$loop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp L$avx_00_47 + +.p2align 4 +L$avx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne L$avx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop_avx + + movq 152(%rsp),%rsi + + vzeroupper + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue_avx: + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-linux.S new file mode 100644 index 0000000000..bbef9430ec --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-linux.S @@ -0,0 +1,2978 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.globl sha512_block_data_order_nohw +.hidden sha512_block_data_order_nohw +.type sha512_block_data_order_nohw,@function +.align 16 +sha512_block_data_order_nohw: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + ret +.cfi_endproc +.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw +.section .rodata +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +.globl sha512_block_data_order_avx +.hidden sha512_block_data_order_avx +.type sha512_block_data_order_avx,@function +.align 64 +sha512_block_data_order_avx: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lavx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_avx + + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + ret +.cfi_endproc +.size sha512_block_data_order_avx,.-sha512_block_data_order_avx +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-win.asm new file mode 100644 index 0000000000..3b02e03a92 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/sha512-x86_64-win.asm @@ -0,0 +1,3140 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +global sha512_block_data_order_nohw + +ALIGN 16 +sha512_block_data_order_nohw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_nohw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,16*8+4*8 + lea rdx,[rdx*8+rsi] + and rsp,-64 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[152+rsp],rax + +$L$prologue: + + mov rax,QWORD[rdi] + mov rbx,QWORD[8+rdi] + mov rcx,QWORD[16+rdi] + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov rdi,rbx + lea rbp,[K512] + xor rdi,rcx + mov r12,QWORD[rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + add r11,r14 + mov r12,QWORD[8+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + add r10,r14 + mov r12,QWORD[16+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + add r9,r14 + mov r12,QWORD[24+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + add r8,r14 + mov r12,QWORD[32+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + add rdx,r14 + mov r12,QWORD[40+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + add rcx,r14 + mov r12,QWORD[48+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + add rbx,r14 + mov r12,QWORD[56+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + add rax,r14 + mov r12,QWORD[64+rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + add r11,r14 + mov r12,QWORD[72+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + add r10,r14 + mov r12,QWORD[80+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + add r9,r14 + mov r12,QWORD[88+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + add r8,r14 + mov r12,QWORD[96+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + add rdx,r14 + mov r12,QWORD[104+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + add rcx,r14 + mov r12,QWORD[112+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + add rbx,r14 + mov r12,QWORD[120+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + jmp NEAR $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx: + mov r13,QWORD[8+rsp] + mov r15,QWORD[112+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[72+rsp] + + add r12,QWORD[rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[16+rsp] + mov rdi,QWORD[120+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[80+rsp] + + add r12,QWORD[8+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[24+rsp] + mov r15,QWORD[rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[88+rsp] + + add r12,QWORD[16+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[32+rsp] + mov rdi,QWORD[8+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[96+rsp] + + add r12,QWORD[24+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[40+rsp] + mov r15,QWORD[16+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[104+rsp] + + add r12,QWORD[32+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[48+rsp] + mov rdi,QWORD[24+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[112+rsp] + + add r12,QWORD[40+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[56+rsp] + mov r15,QWORD[32+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[120+rsp] + + add r12,QWORD[48+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[64+rsp] + mov rdi,QWORD[40+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[rsp] + + add r12,QWORD[56+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[72+rsp] + mov r15,QWORD[48+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[8+rsp] + + add r12,QWORD[64+rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[80+rsp] + mov rdi,QWORD[56+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[16+rsp] + + add r12,QWORD[72+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[88+rsp] + mov r15,QWORD[64+rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[24+rsp] + + add r12,QWORD[80+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[96+rsp] + mov rdi,QWORD[72+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[32+rsp] + + add r12,QWORD[88+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[104+rsp] + mov r15,QWORD[80+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[40+rsp] + + add r12,QWORD[96+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[112+rsp] + mov rdi,QWORD[88+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[48+rsp] + + add r12,QWORD[104+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[120+rsp] + mov r15,QWORD[96+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[56+rsp] + + add r12,QWORD[112+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[rsp] + mov rdi,QWORD[104+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[64+rsp] + + add r12,QWORD[120+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + cmp BYTE[7+rbp],0 + jnz NEAR $L$rounds_16_xx + + mov rdi,QWORD[((128+0))+rsp] + add rax,r14 + lea rsi,[128+rsi] + + add rax,QWORD[rdi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + jb NEAR $L$loop + + mov rsi,QWORD[152+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha512_block_data_order_nohw: +section .rdata rdata align=8 +ALIGN 64 + +K512: + DQ 0x428a2f98d728ae22,0x7137449123ef65cd + DQ 0x428a2f98d728ae22,0x7137449123ef65cd + DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + DQ 0x3956c25bf348b538,0x59f111f1b605d019 + DQ 0x3956c25bf348b538,0x59f111f1b605d019 + DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + DQ 0xd807aa98a3030242,0x12835b0145706fbe + DQ 0xd807aa98a3030242,0x12835b0145706fbe + DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + DQ 0x9bdc06a725c71235,0xc19bf174cf692694 + DQ 0x9bdc06a725c71235,0xc19bf174cf692694 + DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + DQ 0x983e5152ee66dfab,0xa831c66d2db43210 + DQ 0x983e5152ee66dfab,0xa831c66d2db43210 + DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4 + DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4 + DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725 + DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725 + DQ 0x06ca6351e003826f,0x142929670a0e6e70 + DQ 0x06ca6351e003826f,0x142929670a0e6e70 + DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926 + DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926 + DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + DQ 0x650a73548baf63de,0x766a0abb3c77b2a8 + DQ 0x650a73548baf63de,0x766a0abb3c77b2a8 + DQ 0x81c2c92e47edaee6,0x92722c851482353b + DQ 0x81c2c92e47edaee6,0x92722c851482353b + DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001 + DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001 + DQ 0xc24b8b70d0f89791,0xc76c51a30654be30 + DQ 0xc24b8b70d0f89791,0xc76c51a30654be30 + DQ 0xd192e819d6ef5218,0xd69906245565a910 + DQ 0xd192e819d6ef5218,0xd69906245565a910 + DQ 0xf40e35855771202a,0x106aa07032bbd1b8 + DQ 0xf40e35855771202a,0x106aa07032bbd1b8 + DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + DQ 0x748f82ee5defb2fc,0x78a5636f43172f60 + DQ 0x748f82ee5defb2fc,0x78a5636f43172f60 + DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec + DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec + DQ 0x90befffa23631e28,0xa4506cebde82bde9 + DQ 0x90befffa23631e28,0xa4506cebde82bde9 + DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b + DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b + DQ 0xca273eceea26619c,0xd186b8c721c0c207 + DQ 0xca273eceea26619c,0xd186b8c721c0c207 + DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6 + DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6 + DQ 0x113f9804bef90dae,0x1b710b35131c471b + DQ 0x113f9804bef90dae,0x1b710b35131c471b + DQ 0x28db77f523047d84,0x32caab7b40c72493 + DQ 0x28db77f523047d84,0x32caab7b40c72493 + DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + + DQ 0x0001020304050607,0x08090a0b0c0d0e0f + DQ 0x0001020304050607,0x08090a0b0c0d0e0f + DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 + DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 + DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 + DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 + DB 111,114,103,62,0 +section .text + +global sha512_block_data_order_avx + +ALIGN 64 +sha512_block_data_order_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,256 + lea rdx,[rdx*8+rsi] + and rsp,-64 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[152+rsp],rax + + movaps XMMWORD[(128+32)+rsp],xmm6 + movaps XMMWORD[(128+48)+rsp],xmm7 + movaps XMMWORD[(128+64)+rsp],xmm8 + movaps XMMWORD[(128+80)+rsp],xmm9 + movaps XMMWORD[(128+96)+rsp],xmm10 + movaps XMMWORD[(128+112)+rsp],xmm11 +$L$prologue_avx: + + vzeroupper + mov rax,QWORD[rdi] + mov rbx,QWORD[8+rdi] + mov rcx,QWORD[16+rdi] + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$loop_avx +ALIGN 16 +$L$loop_avx: + vmovdqa xmm11,XMMWORD[((K512+1280))] + vmovdqu xmm0,XMMWORD[rsi] + lea rbp,[((K512+128))] + vmovdqu xmm1,XMMWORD[16+rsi] + vmovdqu xmm2,XMMWORD[32+rsi] + vpshufb xmm0,xmm0,xmm11 + vmovdqu xmm3,XMMWORD[48+rsi] + vpshufb xmm1,xmm1,xmm11 + vmovdqu xmm4,XMMWORD[64+rsi] + vpshufb xmm2,xmm2,xmm11 + vmovdqu xmm5,XMMWORD[80+rsi] + vpshufb xmm3,xmm3,xmm11 + vmovdqu xmm6,XMMWORD[96+rsi] + vpshufb xmm4,xmm4,xmm11 + vmovdqu xmm7,XMMWORD[112+rsi] + vpshufb xmm5,xmm5,xmm11 + vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp] + vpshufb xmm6,xmm6,xmm11 + vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp] + vpshufb xmm7,xmm7,xmm11 + vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] + vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp] + vmovdqa XMMWORD[rsp],xmm8 + vpaddq xmm8,xmm4,XMMWORD[rbp] + vmovdqa XMMWORD[16+rsp],xmm9 + vpaddq xmm9,xmm5,XMMWORD[32+rbp] + vmovdqa XMMWORD[32+rsp],xmm10 + vpaddq xmm10,xmm6,XMMWORD[64+rbp] + vmovdqa XMMWORD[48+rsp],xmm11 + vpaddq xmm11,xmm7,XMMWORD[96+rbp] + vmovdqa XMMWORD[64+rsp],xmm8 + mov r14,rax + vmovdqa XMMWORD[80+rsp],xmm9 + mov rdi,rbx + vmovdqa XMMWORD[96+rsp],xmm10 + xor rdi,rcx + vmovdqa XMMWORD[112+rsp],xmm11 + mov r13,r8 + jmp NEAR $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47: + add rbp,256 + vpalignr xmm8,xmm1,xmm0,8 + shrd r13,r13,23 + mov rax,r14 + vpalignr xmm11,xmm5,xmm4,8 + mov r12,r9 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r8 + xor r12,r10 + vpaddq xmm0,xmm0,xmm11 + shrd r13,r13,4 + xor r14,rax + vpsrlq xmm11,xmm8,7 + and r12,r8 + xor r13,r8 + vpsllq xmm9,xmm8,56 + add r11,QWORD[rsp] + mov r15,rax + vpxor xmm8,xmm11,xmm10 + xor r12,r10 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rbx + add r11,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + shrd r14,r14,28 + vpsrlq xmm11,xmm7,6 + add rdx,r11 + add r11,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rdx + add r14,r11 + vpsllq xmm10,xmm7,3 + shrd r13,r13,23 + mov r11,r14 + vpaddq xmm0,xmm0,xmm8 + mov r12,r8 + shrd r14,r14,5 + vpsrlq xmm9,xmm7,19 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r11 + vpsllq xmm10,xmm10,42 + and r12,rdx + xor r13,rdx + vpxor xmm11,xmm11,xmm9 + add r10,QWORD[8+rsp] + mov rdi,r11 + vpsrlq xmm9,xmm9,42 + xor r12,r9 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rax + add r10,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm0,xmm0,xmm11 + xor r14,r11 + add r10,r13 + vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp] + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD[rsp],xmm10 + vpalignr xmm8,xmm2,xmm1,8 + shrd r13,r13,23 + mov r10,r14 + vpalignr xmm11,xmm6,xmm5,8 + mov r12,rdx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rcx + xor r12,r8 + vpaddq xmm1,xmm1,xmm11 + shrd r13,r13,4 + xor r14,r10 + vpsrlq xmm11,xmm8,7 + and r12,rcx + xor r13,rcx + vpsllq xmm9,xmm8,56 + add r9,QWORD[16+rsp] + mov r15,r10 + vpxor xmm8,xmm11,xmm10 + xor r12,r8 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r11 + add r9,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + shrd r14,r14,28 + vpsrlq xmm11,xmm0,6 + add rbx,r9 + add r9,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rbx + add r14,r9 + vpsllq xmm10,xmm0,3 + shrd r13,r13,23 + mov r9,r14 + vpaddq xmm1,xmm1,xmm8 + mov r12,rcx + shrd r14,r14,5 + vpsrlq xmm9,xmm0,19 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r9 + vpsllq xmm10,xmm10,42 + and r12,rbx + xor r13,rbx + vpxor xmm11,xmm11,xmm9 + add r8,QWORD[24+rsp] + mov rdi,r9 + vpsrlq xmm9,xmm9,42 + xor r12,rdx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r10 + add r8,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm1,xmm1,xmm11 + xor r14,r9 + add r8,r13 + vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp] + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD[16+rsp],xmm10 + vpalignr xmm8,xmm3,xmm2,8 + shrd r13,r13,23 + mov r8,r14 + vpalignr xmm11,xmm7,xmm6,8 + mov r12,rbx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rax + xor r12,rcx + vpaddq xmm2,xmm2,xmm11 + shrd r13,r13,4 + xor r14,r8 + vpsrlq xmm11,xmm8,7 + and r12,rax + xor r13,rax + vpsllq xmm9,xmm8,56 + add rdx,QWORD[32+rsp] + mov r15,r8 + vpxor xmm8,xmm11,xmm10 + xor r12,rcx + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r9 + add rdx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + shrd r14,r14,28 + vpsrlq xmm11,xmm1,6 + add r11,rdx + add rdx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r11 + add r14,rdx + vpsllq xmm10,xmm1,3 + shrd r13,r13,23 + mov rdx,r14 + vpaddq xmm2,xmm2,xmm8 + mov r12,rax + shrd r14,r14,5 + vpsrlq xmm9,xmm1,19 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rdx + vpsllq xmm10,xmm10,42 + and r12,r11 + xor r13,r11 + vpxor xmm11,xmm11,xmm9 + add rcx,QWORD[40+rsp] + mov rdi,rdx + vpsrlq xmm9,xmm9,42 + xor r12,rbx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r8 + add rcx,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm2,xmm2,xmm11 + xor r14,rdx + add rcx,r13 + vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD[32+rsp],xmm10 + vpalignr xmm8,xmm4,xmm3,8 + shrd r13,r13,23 + mov rcx,r14 + vpalignr xmm11,xmm0,xmm7,8 + mov r12,r11 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r10 + xor r12,rax + vpaddq xmm3,xmm3,xmm11 + shrd r13,r13,4 + xor r14,rcx + vpsrlq xmm11,xmm8,7 + and r12,r10 + xor r13,r10 + vpsllq xmm9,xmm8,56 + add rbx,QWORD[48+rsp] + mov r15,rcx + vpxor xmm8,xmm11,xmm10 + xor r12,rax + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rdx + add rbx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + shrd r14,r14,28 + vpsrlq xmm11,xmm2,6 + add r9,rbx + add rbx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r9 + add r14,rbx + vpsllq xmm10,xmm2,3 + shrd r13,r13,23 + mov rbx,r14 + vpaddq xmm3,xmm3,xmm8 + mov r12,r10 + shrd r14,r14,5 + vpsrlq xmm9,xmm2,19 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rbx + vpsllq xmm10,xmm10,42 + and r12,r9 + xor r13,r9 + vpxor xmm11,xmm11,xmm9 + add rax,QWORD[56+rsp] + mov rdi,rbx + vpsrlq xmm9,xmm9,42 + xor r12,r11 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rcx + add rax,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm3,xmm3,xmm11 + xor r14,rbx + add rax,r13 + vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp] + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD[48+rsp],xmm10 + vpalignr xmm8,xmm5,xmm4,8 + shrd r13,r13,23 + mov rax,r14 + vpalignr xmm11,xmm1,xmm0,8 + mov r12,r9 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r8 + xor r12,r10 + vpaddq xmm4,xmm4,xmm11 + shrd r13,r13,4 + xor r14,rax + vpsrlq xmm11,xmm8,7 + and r12,r8 + xor r13,r8 + vpsllq xmm9,xmm8,56 + add r11,QWORD[64+rsp] + mov r15,rax + vpxor xmm8,xmm11,xmm10 + xor r12,r10 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rbx + add r11,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + shrd r14,r14,28 + vpsrlq xmm11,xmm3,6 + add rdx,r11 + add r11,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rdx + add r14,r11 + vpsllq xmm10,xmm3,3 + shrd r13,r13,23 + mov r11,r14 + vpaddq xmm4,xmm4,xmm8 + mov r12,r8 + shrd r14,r14,5 + vpsrlq xmm9,xmm3,19 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r11 + vpsllq xmm10,xmm10,42 + and r12,rdx + xor r13,rdx + vpxor xmm11,xmm11,xmm9 + add r10,QWORD[72+rsp] + mov rdi,r11 + vpsrlq xmm9,xmm9,42 + xor r12,r9 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rax + add r10,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm4,xmm4,xmm11 + xor r14,r11 + add r10,r13 + vpaddq xmm10,xmm4,XMMWORD[rbp] + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD[64+rsp],xmm10 + vpalignr xmm8,xmm6,xmm5,8 + shrd r13,r13,23 + mov r10,r14 + vpalignr xmm11,xmm2,xmm1,8 + mov r12,rdx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rcx + xor r12,r8 + vpaddq xmm5,xmm5,xmm11 + shrd r13,r13,4 + xor r14,r10 + vpsrlq xmm11,xmm8,7 + and r12,rcx + xor r13,rcx + vpsllq xmm9,xmm8,56 + add r9,QWORD[80+rsp] + mov r15,r10 + vpxor xmm8,xmm11,xmm10 + xor r12,r8 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r11 + add r9,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + shrd r14,r14,28 + vpsrlq xmm11,xmm4,6 + add rbx,r9 + add r9,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rbx + add r14,r9 + vpsllq xmm10,xmm4,3 + shrd r13,r13,23 + mov r9,r14 + vpaddq xmm5,xmm5,xmm8 + mov r12,rcx + shrd r14,r14,5 + vpsrlq xmm9,xmm4,19 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r9 + vpsllq xmm10,xmm10,42 + and r12,rbx + xor r13,rbx + vpxor xmm11,xmm11,xmm9 + add r8,QWORD[88+rsp] + mov rdi,r9 + vpsrlq xmm9,xmm9,42 + xor r12,rdx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r10 + add r8,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm5,xmm5,xmm11 + xor r14,r9 + add r8,r13 + vpaddq xmm10,xmm5,XMMWORD[32+rbp] + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD[80+rsp],xmm10 + vpalignr xmm8,xmm7,xmm6,8 + shrd r13,r13,23 + mov r8,r14 + vpalignr xmm11,xmm3,xmm2,8 + mov r12,rbx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rax + xor r12,rcx + vpaddq xmm6,xmm6,xmm11 + shrd r13,r13,4 + xor r14,r8 + vpsrlq xmm11,xmm8,7 + and r12,rax + xor r13,rax + vpsllq xmm9,xmm8,56 + add rdx,QWORD[96+rsp] + mov r15,r8 + vpxor xmm8,xmm11,xmm10 + xor r12,rcx + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r9 + add rdx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + shrd r14,r14,28 + vpsrlq xmm11,xmm5,6 + add r11,rdx + add rdx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r11 + add r14,rdx + vpsllq xmm10,xmm5,3 + shrd r13,r13,23 + mov rdx,r14 + vpaddq xmm6,xmm6,xmm8 + mov r12,rax + shrd r14,r14,5 + vpsrlq xmm9,xmm5,19 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rdx + vpsllq xmm10,xmm10,42 + and r12,r11 + xor r13,r11 + vpxor xmm11,xmm11,xmm9 + add rcx,QWORD[104+rsp] + mov rdi,rdx + vpsrlq xmm9,xmm9,42 + xor r12,rbx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r8 + add rcx,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm6,xmm6,xmm11 + xor r14,rdx + add rcx,r13 + vpaddq xmm10,xmm6,XMMWORD[64+rbp] + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD[96+rsp],xmm10 + vpalignr xmm8,xmm0,xmm7,8 + shrd r13,r13,23 + mov rcx,r14 + vpalignr xmm11,xmm4,xmm3,8 + mov r12,r11 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r10 + xor r12,rax + vpaddq xmm7,xmm7,xmm11 + shrd r13,r13,4 + xor r14,rcx + vpsrlq xmm11,xmm8,7 + and r12,r10 + xor r13,r10 + vpsllq xmm9,xmm8,56 + add rbx,QWORD[112+rsp] + mov r15,rcx + vpxor xmm8,xmm11,xmm10 + xor r12,rax + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rdx + add rbx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + shrd r14,r14,28 + vpsrlq xmm11,xmm6,6 + add r9,rbx + add rbx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r9 + add r14,rbx + vpsllq xmm10,xmm6,3 + shrd r13,r13,23 + mov rbx,r14 + vpaddq xmm7,xmm7,xmm8 + mov r12,r10 + shrd r14,r14,5 + vpsrlq xmm9,xmm6,19 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rbx + vpsllq xmm10,xmm10,42 + and r12,r9 + xor r13,r9 + vpxor xmm11,xmm11,xmm9 + add rax,QWORD[120+rsp] + mov rdi,rbx + vpsrlq xmm9,xmm9,42 + xor r12,r11 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rcx + add rax,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm7,xmm7,xmm11 + xor r14,rbx + add rax,r13 + vpaddq xmm10,xmm7,XMMWORD[96+rbp] + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD[112+rsp],xmm10 + cmp BYTE[135+rbp],0 + jne NEAR $L$avx_00_47 + shrd r13,r13,23 + mov rax,r14 + mov r12,r9 + shrd r14,r14,5 + xor r13,r8 + xor r12,r10 + shrd r13,r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD[rsp] + mov r15,rax + xor r12,r10 + shrd r14,r14,6 + xor r15,rbx + add r11,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + shrd r14,r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + shrd r13,r13,23 + mov r11,r14 + mov r12,r8 + shrd r14,r14,5 + xor r13,rdx + xor r12,r9 + shrd r13,r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD[8+rsp] + mov rdi,r11 + xor r12,r9 + shrd r14,r14,6 + xor rdi,rax + add r10,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + shrd r13,r13,23 + mov r10,r14 + mov r12,rdx + shrd r14,r14,5 + xor r13,rcx + xor r12,r8 + shrd r13,r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD[16+rsp] + mov r15,r10 + xor r12,r8 + shrd r14,r14,6 + xor r15,r11 + add r9,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + shrd r14,r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + shrd r13,r13,23 + mov r9,r14 + mov r12,rcx + shrd r14,r14,5 + xor r13,rbx + xor r12,rdx + shrd r13,r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD[24+rsp] + mov rdi,r9 + xor r12,rdx + shrd r14,r14,6 + xor rdi,r10 + add r8,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + shrd r13,r13,23 + mov r8,r14 + mov r12,rbx + shrd r14,r14,5 + xor r13,rax + xor r12,rcx + shrd r13,r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD[32+rsp] + mov r15,r8 + xor r12,rcx + shrd r14,r14,6 + xor r15,r9 + add rdx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + shrd r14,r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + shrd r13,r13,23 + mov rdx,r14 + mov r12,rax + shrd r14,r14,5 + xor r13,r11 + xor r12,rbx + shrd r13,r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD[40+rsp] + mov rdi,rdx + xor r12,rbx + shrd r14,r14,6 + xor rdi,r8 + add rcx,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + shrd r13,r13,23 + mov rcx,r14 + mov r12,r11 + shrd r14,r14,5 + xor r13,r10 + xor r12,rax + shrd r13,r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD[48+rsp] + mov r15,rcx + xor r12,rax + shrd r14,r14,6 + xor r15,rdx + add rbx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + shrd r14,r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + shrd r13,r13,23 + mov rbx,r14 + mov r12,r10 + shrd r14,r14,5 + xor r13,r9 + xor r12,r11 + shrd r13,r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD[56+rsp] + mov rdi,rbx + xor r12,r11 + shrd r14,r14,6 + xor rdi,rcx + add rax,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + shrd r13,r13,23 + mov rax,r14 + mov r12,r9 + shrd r14,r14,5 + xor r13,r8 + xor r12,r10 + shrd r13,r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD[64+rsp] + mov r15,rax + xor r12,r10 + shrd r14,r14,6 + xor r15,rbx + add r11,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + shrd r14,r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + shrd r13,r13,23 + mov r11,r14 + mov r12,r8 + shrd r14,r14,5 + xor r13,rdx + xor r12,r9 + shrd r13,r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD[72+rsp] + mov rdi,r11 + xor r12,r9 + shrd r14,r14,6 + xor rdi,rax + add r10,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + shrd r13,r13,23 + mov r10,r14 + mov r12,rdx + shrd r14,r14,5 + xor r13,rcx + xor r12,r8 + shrd r13,r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD[80+rsp] + mov r15,r10 + xor r12,r8 + shrd r14,r14,6 + xor r15,r11 + add r9,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + shrd r14,r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + shrd r13,r13,23 + mov r9,r14 + mov r12,rcx + shrd r14,r14,5 + xor r13,rbx + xor r12,rdx + shrd r13,r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD[88+rsp] + mov rdi,r9 + xor r12,rdx + shrd r14,r14,6 + xor rdi,r10 + add r8,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + shrd r13,r13,23 + mov r8,r14 + mov r12,rbx + shrd r14,r14,5 + xor r13,rax + xor r12,rcx + shrd r13,r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD[96+rsp] + mov r15,r8 + xor r12,rcx + shrd r14,r14,6 + xor r15,r9 + add rdx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + shrd r14,r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + shrd r13,r13,23 + mov rdx,r14 + mov r12,rax + shrd r14,r14,5 + xor r13,r11 + xor r12,rbx + shrd r13,r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD[104+rsp] + mov rdi,rdx + xor r12,rbx + shrd r14,r14,6 + xor rdi,r8 + add rcx,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + shrd r13,r13,23 + mov rcx,r14 + mov r12,r11 + shrd r14,r14,5 + xor r13,r10 + xor r12,rax + shrd r13,r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD[112+rsp] + mov r15,rcx + xor r12,rax + shrd r14,r14,6 + xor r15,rdx + add rbx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + shrd r14,r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + shrd r13,r13,23 + mov rbx,r14 + mov r12,r10 + shrd r14,r14,5 + xor r13,r9 + xor r12,r11 + shrd r13,r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD[120+rsp] + mov rdi,rbx + xor r12,r11 + shrd r14,r14,6 + xor rdi,rcx + add rax,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + mov rdi,QWORD[((128+0))+rsp] + mov rax,r14 + + add rax,QWORD[rdi] + lea rsi,[128+rsi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + jb NEAR $L$loop_avx + + mov rsi,QWORD[152+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((128+32))+rsp] + movaps xmm7,XMMWORD[((128+48))+rsp] + movaps xmm8,XMMWORD[((128+64))+rsp] + movaps xmm9,XMMWORD[((128+80))+rsp] + movaps xmm10,XMMWORD[((128+96))+rsp] + movaps xmm11,XMMWORD[((128+112))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_sha512_block_data_order_avx: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + mov rsi,rax + mov rax,QWORD[((128+24))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea r10,[$L$epilogue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea rsi,[((128+32))+rsi] + lea rdi,[512+r8] + mov ecx,12 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha512_block_data_order_nohw wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order_nohw wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order_nohw wrt ..imagebase + DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha512_block_data_order_nohw: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha512_block_data_order_avx: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-armv7-linux.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv7-linux.S new file mode 100644 index 0000000000..6e7898ad61 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv7-linux.S @@ -0,0 +1,1225 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +.syntax unified + +.arch armv7-a +.fpu neon + +#if defined(__thumb2__) +.thumb +#else +.code 32 +#endif + +.text + +.type _vpaes_consts,%object +.align 7 @ totally strategic alignment +_vpaes_consts: +.Lk_mc_forward:@ mc_forward +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 +.Lk_mc_backward:@ mc_backward +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F +.Lk_sr:@ sr +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +@ +@ "Hot" constants +@ +.Lk_inv:@ inv, inva +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 +.Lk_ipt:@ input transform (lo, hi) +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 +.Lk_sbo:@ sbou, sbot +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA +.Lk_sb1:@ sb1u, sb1t +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.Lk_sb2:@ sb2u, sb2t +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,55,32,78,69,79,78,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 2 +.size _vpaes_consts,.-_vpaes_consts +.align 6 +@@ +@@ _aes_preheat +@@ +@@ Fills q9-q15 as specified below. +@@ +.type _vpaes_preheat,%function +.align 4 +_vpaes_preheat: + adr r10, .Lk_inv + vmov.i8 q9, #0x0f @ .Lk_s0F + vld1.64 {q10,q11}, [r10]! @ .Lk_inv + add r10, r10, #64 @ Skip .Lk_ipt, .Lk_sbo + vld1.64 {q12,q13}, [r10]! @ .Lk_sb1 + vld1.64 {q14,q15}, [r10] @ .Lk_sb2 + bx lr + +@@ +@@ _aes_encrypt_core +@@ +@@ AES-encrypt q0. +@@ +@@ Inputs: +@@ q0 = input +@@ q9-q15 as in _vpaes_preheat +@@ [r2] = scheduled keys +@@ +@@ Output in q0 +@@ Clobbers q1-q5, r8-r11 +@@ Preserves q6-q8 so you get some local vectors +@@ +@@ +.type _vpaes_encrypt_core,%function +.align 4 +_vpaes_encrypt_core: + mov r9, r2 + ldr r8, [r2,#240] @ pull rounds + adr r11, .Lk_ipt + @ vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + @ vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + vld1.64 {q2, q3}, [r11] + adr r11, .Lk_mc_forward+16 + vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 # round0 key + vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 + vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 + vtbl.8 d2, {q2}, d2 @ vpshufb %xmm1, %xmm2, %xmm1 + vtbl.8 d3, {q2}, d3 + vtbl.8 d4, {q3}, d0 @ vpshufb %xmm0, %xmm3, %xmm2 + vtbl.8 d5, {q3}, d1 + veor q0, q1, q5 @ vpxor %xmm5, %xmm1, %xmm0 + veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 + + @ .Lenc_entry ends with a bnz instruction which is normally paired with + @ subs in .Lenc_loop. + tst r8, r8 + b .Lenc_entry + +.align 4 +.Lenc_loop: + @ middle of middle round + add r10, r11, #0x40 + vtbl.8 d8, {q13}, d4 @ vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + vtbl.8 d9, {q13}, d5 + vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + vtbl.8 d0, {q12}, d6 @ vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + vtbl.8 d1, {q12}, d7 + veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + vtbl.8 d10, {q15}, d4 @ vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + vtbl.8 d11, {q15}, d5 + veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A + vtbl.8 d4, {q14}, d6 @ vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + vtbl.8 d5, {q14}, d7 + vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + vtbl.8 d6, {q0}, d2 @ vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + vtbl.8 d7, {q0}, d3 + veor q2, q2, q5 @ vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + @ Write to q5 instead of q0, so the table and destination registers do + @ not overlap. + vtbl.8 d10, {q0}, d8 @ vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + vtbl.8 d11, {q0}, d9 + veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + vtbl.8 d8, {q3}, d2 @ vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + vtbl.8 d9, {q3}, d3 + @ Here we restore the original q0/q5 usage. + veor q0, q5, q3 @ vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + and r11, r11, #~(1<<6) @ and $0x30, %r11 # ... mod 4 + veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + subs r8, r8, #1 @ nr-- + +.Lenc_entry: + @ top of round + vand q1, q0, q9 @ vpand %xmm0, %xmm9, %xmm1 # 0 = k + vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i + vtbl.8 d10, {q11}, d2 @ vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + vtbl.8 d11, {q11}, d3 + veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j + vtbl.8 d6, {q10}, d0 @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + vtbl.8 d7, {q10}, d1 + vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + vtbl.8 d9, {q10}, d3 + veor q3, q3, q5 @ vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + vtbl.8 d4, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + vtbl.8 d5, {q10}, d7 + vtbl.8 d6, {q10}, d8 @ vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + vtbl.8 d7, {q10}, d9 + veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io + veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 + bne .Lenc_loop + + @ middle of last round + add r10, r11, #0x80 + + adr r11, .Lk_sbo + @ Read to q1 instead of q4, so the vtbl.8 instruction below does not + @ overlap table and destination registers. + vld1.64 {q1}, [r11]! @ vmovdqa -0x60(%r10), %xmm4 # 3 : sbou + vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + vtbl.8 d8, {q1}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + vtbl.8 d9, {q1}, d5 + vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + @ Write to q2 instead of q0 below, to avoid overlapping table and + @ destination registers. + vtbl.8 d4, {q0}, d6 @ vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + vtbl.8 d5, {q0}, d7 + veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + veor q2, q2, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A + @ Here we restore the original q0/q2 usage. + vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0 + vtbl.8 d1, {q2}, d3 + bx lr +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + +.globl vpaes_encrypt +.hidden vpaes_encrypt +.type vpaes_encrypt,%function +.align 4 +vpaes_encrypt: + @ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack + @ alignment. + stmdb sp!, {r7,r8,r9,r10,r11,lr} + @ _vpaes_encrypt_core uses q4-q5 (d8-d11), which are callee-saved. + vstmdb sp!, {d8,d9,d10,d11} + + vld1.64 {q0}, [r0] + bl _vpaes_preheat + bl _vpaes_encrypt_core + vst1.64 {q0}, [r1] + + vldmia sp!, {d8,d9,d10,d11} + ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return +.size vpaes_encrypt,.-vpaes_encrypt + +@ +@ Decryption stuff +@ +.type _vpaes_decrypt_consts,%object +.align 4 +_vpaes_decrypt_consts: +.Lk_dipt:@ decryption input transform +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 +.Lk_dsbo:@ decryption sbox final output +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.Lk_dsb9:@ decryption sbox output *9*u, *9*t +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd:@ decryption sbox output *D*u, *D*t +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb:@ decryption sbox output *B*u, *B*t +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe:@ decryption sbox output *E*u, *E*t +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.size _vpaes_decrypt_consts,.-_vpaes_decrypt_consts + +@@ +@@ Decryption core +@@ +@@ Same API as encryption core, except it clobbers q12-q15 rather than using +@@ the values from _vpaes_preheat. q9-q11 must still be set from +@@ _vpaes_preheat. +@@ +.type _vpaes_decrypt_core,%function +.align 4 +_vpaes_decrypt_core: + mov r9, r2 + ldr r8, [r2,#240] @ pull rounds + + @ This function performs shuffles with various constants. The x86_64 + @ version loads them on-demand into %xmm0-%xmm5. This does not work well + @ for ARMv7 because those registers are shuffle destinations. The ARMv8 + @ version preloads those constants into registers, but ARMv7 has half + @ the registers to work with. Instead, we load them on-demand into + @ q12-q15, registers normally use for preloaded constants. This is fine + @ because decryption doesn't use those constants. The values are + @ constant, so this does not interfere with potential 2x optimizations. + adr r7, .Lk_dipt + + vld1.64 {q12,q13}, [r7] @ vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl r11, r8, #4 @ mov %rax, %r11; shl $4, %r11 + eor r11, r11, #0x30 @ xor $0x30, %r11 + adr r10, .Lk_sr + and r11, r11, #0x30 @ and $0x30, %r11 + add r11, r11, r10 + adr r10, .Lk_mc_forward+48 + + vld1.64 {q4}, [r9]! @ vmovdqu (%r9), %xmm4 # round0 key + vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 + vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 + vtbl.8 d4, {q12}, d2 @ vpshufb %xmm1, %xmm2, %xmm2 + vtbl.8 d5, {q12}, d3 + vld1.64 {q5}, [r10] @ vmovdqa .Lk_mc_forward+48(%rip), %xmm5 + @ vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + vtbl.8 d0, {q13}, d0 @ vpshufb %xmm0, %xmm1, %xmm0 + vtbl.8 d1, {q13}, d1 + veor q2, q2, q4 @ vpxor %xmm4, %xmm2, %xmm2 + veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 + + @ .Ldec_entry ends with a bnz instruction which is normally paired with + @ subs in .Ldec_loop. + tst r8, r8 + b .Ldec_entry + +.align 4 +.Ldec_loop: +@ +@ Inverse mix columns +@ + + @ We load .Lk_dsb* into q12-q15 on-demand. See the comment at the top of + @ the function. + adr r10, .Lk_dsb9 + vld1.64 {q12,q13}, [r10]! @ vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + @ vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + @ Load sbd* ahead of time. + vld1.64 {q14,q15}, [r10]! @ vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + @ vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + vtbl.8 d8, {q12}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + vtbl.8 d9, {q12}, d5 + vtbl.8 d2, {q13}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + vtbl.8 d3, {q13}, d7 + veor q0, q4, q0 @ vpxor %xmm4, %xmm0, %xmm0 + + veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + + @ Load sbb* ahead of time. + vld1.64 {q12,q13}, [r10]! @ vmovdqa 0x20(%r10),%xmm4 # 4 : sbbu + @ vmovdqa 0x30(%r10),%xmm1 # 0 : sbbt + + vtbl.8 d8, {q14}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + vtbl.8 d9, {q14}, d5 + @ Write to q1 instead of q0, so the table and destination registers do + @ not overlap. + vtbl.8 d2, {q0}, d10 @ vpshufb %xmm5, %xmm0, %xmm0 # MC ch + vtbl.8 d3, {q0}, d11 + @ Here we restore the original q0/q1 usage. This instruction is + @ reordered from the ARMv8 version so we do not clobber the vtbl.8 + @ below. + veor q0, q1, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + vtbl.8 d2, {q15}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + vtbl.8 d3, {q15}, d7 + @ vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + @ vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + @ Load sbd* ahead of time. + vld1.64 {q14,q15}, [r10]! @ vmovdqa 0x40(%r10),%xmm4 # 4 : sbeu + @ vmovdqa 0x50(%r10),%xmm1 # 0 : sbet + + vtbl.8 d8, {q12}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + vtbl.8 d9, {q12}, d5 + @ Write to q1 instead of q0, so the table and destination registers do + @ not overlap. + vtbl.8 d2, {q0}, d10 @ vpshufb %xmm5, %xmm0, %xmm0 # MC ch + vtbl.8 d3, {q0}, d11 + @ Here we restore the original q0/q1 usage. This instruction is + @ reordered from the ARMv8 version so we do not clobber the vtbl.8 + @ below. + veor q0, q1, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + vtbl.8 d2, {q13}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + vtbl.8 d3, {q13}, d7 + veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + + vtbl.8 d8, {q14}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + vtbl.8 d9, {q14}, d5 + @ Write to q1 instead of q0, so the table and destination registers do + @ not overlap. + vtbl.8 d2, {q0}, d10 @ vpshufb %xmm5, %xmm0, %xmm0 # MC ch + vtbl.8 d3, {q0}, d11 + @ Here we restore the original q0/q1 usage. This instruction is + @ reordered from the ARMv8 version so we do not clobber the vtbl.8 + @ below. + veor q0, q1, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + vtbl.8 d2, {q15}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + vtbl.8 d3, {q15}, d7 + vext.8 q5, q5, q5, #12 @ vpalignr $12, %xmm5, %xmm5, %xmm5 + veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + subs r8, r8, #1 @ sub $1,%rax # nr-- + +.Ldec_entry: + @ top of round + vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 # 0 = k + vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i + vtbl.8 d4, {q11}, d2 @ vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + vtbl.8 d5, {q11}, d3 + veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j + vtbl.8 d6, {q10}, d0 @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + vtbl.8 d7, {q10}, d1 + vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + vtbl.8 d9, {q10}, d3 + veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + vtbl.8 d4, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + vtbl.8 d5, {q10}, d7 + vtbl.8 d6, {q10}, d8 @ vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + vtbl.8 d7, {q10}, d9 + veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io + veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + vld1.64 {q0}, [r9]! @ vmovdqu (%r9), %xmm0 + bne .Ldec_loop + + @ middle of last round + + adr r10, .Lk_dsbo + + @ Write to q1 rather than q4 to avoid overlapping table and destination. + vld1.64 {q1}, [r10]! @ vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + vtbl.8 d8, {q1}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + vtbl.8 d9, {q1}, d5 + @ Write to q2 rather than q1 to avoid overlapping table and destination. + vld1.64 {q2}, [r10] @ vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + vtbl.8 d2, {q2}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + vtbl.8 d3, {q2}, d7 + vld1.64 {q2}, [r11] @ vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + veor q4, q4, q0 @ vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + @ Write to q1 rather than q0 so the table and destination registers + @ below do not overlap. + veor q1, q1, q4 @ vpxor %xmm4, %xmm1, %xmm0 # 0 = A + vtbl.8 d0, {q1}, d4 @ vpshufb %xmm2, %xmm0, %xmm0 + vtbl.8 d1, {q1}, d5 + bx lr +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + +.globl vpaes_decrypt +.hidden vpaes_decrypt +.type vpaes_decrypt,%function +.align 4 +vpaes_decrypt: + @ _vpaes_decrypt_core uses r7-r11. + stmdb sp!, {r7,r8,r9,r10,r11,lr} + @ _vpaes_decrypt_core uses q4-q5 (d8-d11), which are callee-saved. + vstmdb sp!, {d8,d9,d10,d11} + + vld1.64 {q0}, [r0] + bl _vpaes_preheat + bl _vpaes_decrypt_core + vst1.64 {q0}, [r1] + + vldmia sp!, {d8,d9,d10,d11} + ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return +.size vpaes_decrypt,.-vpaes_decrypt +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@@ @@ +@@ AES key schedule @@ +@@ @@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +@ This function diverges from both x86_64 and armv7 in which constants are +@ pinned. x86_64 has a common preheat function for all operations. aarch64 +@ separates them because it has enough registers to pin nearly all constants. +@ armv7 does not have enough registers, but needing explicit loads and stores +@ also complicates using x86_64's register allocation directly. +@ +@ We pin some constants for convenience and leave q14 and q15 free to load +@ others on demand. + +@ +@ Key schedule constants +@ +.type _vpaes_key_consts,%object +.align 4 +_vpaes_key_consts: +.Lk_dksd:@ decryption key schedule: invskew x*D +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb:@ decryption key schedule: invskew x*B +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse:@ decryption key schedule: invskew x*E + 0x63 +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9:@ decryption key schedule: invskew x*9 +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + +.Lk_rcon:@ rcon +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_opt:@ output transform +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 +.Lk_deskew:@ deskew tables: inverts the sbox's "skew" +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 +.size _vpaes_key_consts,.-_vpaes_key_consts + +.type _vpaes_key_preheat,%function +.align 4 +_vpaes_key_preheat: + adr r11, .Lk_rcon + vmov.i8 q12, #0x5b @ .Lk_s63 + adr r10, .Lk_inv @ Must be aligned to 8 mod 16. + vmov.i8 q9, #0x0f @ .Lk_s0F + vld1.64 {q10,q11}, [r10] @ .Lk_inv + vld1.64 {q8}, [r11] @ .Lk_rcon + bx lr +.size _vpaes_key_preheat,.-_vpaes_key_preheat + +.type _vpaes_schedule_core,%function +.align 4 +_vpaes_schedule_core: + @ We only need to save lr, but ARM requires an 8-byte stack alignment, + @ so save an extra register. + stmdb sp!, {r3,lr} + + bl _vpaes_key_preheat @ load the tables + + adr r11, .Lk_ipt @ Must be aligned to 8 mod 16. + vld1.64 {q0}, [r0]! @ vmovdqu (%rdi), %xmm0 # load key (unaligned) + + @ input transform + @ Use q4 here rather than q3 so .Lschedule_am_decrypting does not + @ overlap table and destination. + vmov q4, q0 @ vmovdqa %xmm0, %xmm3 + bl _vpaes_schedule_transform + adr r10, .Lk_sr @ Must be aligned to 8 mod 16. + vmov q7, q0 @ vmovdqa %xmm0, %xmm7 + + add r8, r8, r10 + tst r3, r3 + bne .Lschedule_am_decrypting + + @ encrypting, output zeroth round key after transform + vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx) + b .Lschedule_go + +.Lschedule_am_decrypting: + @ decrypting, output zeroth round key after shiftrows + vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 + vtbl.8 d6, {q4}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 + vtbl.8 d7, {q4}, d3 + vst1.64 {q3}, [r2] @ vmovdqu %xmm3, (%rdx) + eor r8, r8, #0x30 @ xor $0x30, %r8 + +.Lschedule_go: + cmp r1, #192 @ cmp $192, %esi + bhi .Lschedule_256 + beq .Lschedule_192 + @ 128: fall though + +@@ +@@ .schedule_128 +@@ +@@ 128-bit specific part of key schedule. +@@ +@@ This schedule is really simple, because all its parts +@@ are accomplished by the subroutines. +@@ +.Lschedule_128: + mov r0, #10 @ mov $10, %esi + +.Loop_schedule_128: + bl _vpaes_schedule_round + subs r0, r0, #1 @ dec %esi + beq .Lschedule_mangle_last + bl _vpaes_schedule_mangle @ write output + b .Loop_schedule_128 + +@@ +@@ .aes_schedule_192 +@@ +@@ 192-bit specific part of key schedule. +@@ +@@ The main body of this schedule is the same as the 128-bit +@@ schedule, but with more smearing. The long, high side is +@@ stored in q7 as before, and the short, low side is in +@@ the high bits of q6. +@@ +@@ This schedule is somewhat nastier, however, because each +@@ round produces 192 bits of key material, or 1.5 round keys. +@@ Therefore, on each cycle we do 2 rounds and produce 3 round +@@ keys. +@@ +.align 4 +.Lschedule_192: + sub r0, r0, #8 + vld1.64 {q0}, [r0] @ vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + bl _vpaes_schedule_transform @ input transform + vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save short part + vmov.i8 d12, #0 @ vpxor %xmm4, %xmm4, %xmm4 # clear 4 + @ vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros + mov r0, #4 @ mov $4, %esi + +.Loop_schedule_192: + bl _vpaes_schedule_round + vext.8 q0, q6, q0, #8 @ vpalignr $8,%xmm6,%xmm0,%xmm0 + bl _vpaes_schedule_mangle @ save key n + bl _vpaes_schedule_192_smear + bl _vpaes_schedule_mangle @ save key n+1 + bl _vpaes_schedule_round + subs r0, r0, #1 @ dec %esi + beq .Lschedule_mangle_last + bl _vpaes_schedule_mangle @ save key n+2 + bl _vpaes_schedule_192_smear + b .Loop_schedule_192 + +@@ +@@ .aes_schedule_256 +@@ +@@ 256-bit specific part of key schedule. +@@ +@@ The structure here is very similar to the 128-bit +@@ schedule, but with an additional "low side" in +@@ q6. The low side's rounds are the same as the +@@ high side's, except no rcon and no rotation. +@@ +.align 4 +.Lschedule_256: + vld1.64 {q0}, [r0] @ vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + bl _vpaes_schedule_transform @ input transform + mov r0, #7 @ mov $7, %esi + +.Loop_schedule_256: + bl _vpaes_schedule_mangle @ output low result + vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + @ high round + bl _vpaes_schedule_round + subs r0, r0, #1 @ dec %esi + beq .Lschedule_mangle_last + bl _vpaes_schedule_mangle + + @ low round. swap xmm7 and xmm6 + vdup.32 q0, d1[1] @ vpshufd $0xFF, %xmm0, %xmm0 + vmov.i8 q4, #0 + vmov q5, q7 @ vmovdqa %xmm7, %xmm5 + vmov q7, q6 @ vmovdqa %xmm6, %xmm7 + bl _vpaes_schedule_low_round + vmov q7, q5 @ vmovdqa %xmm5, %xmm7 + + b .Loop_schedule_256 + +@@ +@@ .aes_schedule_mangle_last +@@ +@@ Mangler for last round of key schedule +@@ Mangles q0 +@@ when encrypting, outputs out(q0) ^ 63 +@@ when decrypting, outputs unskew(q0) +@@ +@@ Always called right before return... jumps to cleanup and exits +@@ +.align 4 +.Lschedule_mangle_last: + @ schedule last round key from xmm0 + adr r11, .Lk_deskew @ lea .Lk_deskew(%rip),%r11 # prepare to deskew + tst r3, r3 + bne .Lschedule_mangle_last_dec + + @ encrypting + vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10),%xmm1 + adr r11, .Lk_opt @ lea .Lk_opt(%rip), %r11 # prepare to output transform + add r2, r2, #32 @ add $32, %rdx + vmov q2, q0 + vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0 # output permute + vtbl.8 d1, {q2}, d3 + +.Lschedule_mangle_last_dec: + sub r2, r2, #16 @ add $-16, %rdx + veor q0, q0, q12 @ vpxor .Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform @ output transform + vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx) # save last key + + @ cleanup + veor q0, q0, q0 @ vpxor %xmm0, %xmm0, %xmm0 + veor q1, q1, q1 @ vpxor %xmm1, %xmm1, %xmm1 + veor q2, q2, q2 @ vpxor %xmm2, %xmm2, %xmm2 + veor q3, q3, q3 @ vpxor %xmm3, %xmm3, %xmm3 + veor q4, q4, q4 @ vpxor %xmm4, %xmm4, %xmm4 + veor q5, q5, q5 @ vpxor %xmm5, %xmm5, %xmm5 + veor q6, q6, q6 @ vpxor %xmm6, %xmm6, %xmm6 + veor q7, q7, q7 @ vpxor %xmm7, %xmm7, %xmm7 + ldmia sp!, {r3,pc} @ return +.size _vpaes_schedule_core,.-_vpaes_schedule_core + +@@ +@@ .aes_schedule_192_smear +@@ +@@ Smear the short, low side in the 192-bit key schedule. +@@ +@@ Inputs: +@@ q7: high side, b a x y +@@ q6: low side, d c 0 0 +@@ +@@ Outputs: +@@ q6: b+c+d b+c 0 0 +@@ q0: b+c+d b+c b a +@@ +.type _vpaes_schedule_192_smear,%function +.align 4 +_vpaes_schedule_192_smear: + vmov.i8 q1, #0 + vdup.32 q0, d15[1] + vshl.i64 q1, q6, #32 @ vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 + vmov d0, d15 @ vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + veor q6, q6, q1 @ vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 + veor q1, q1, q1 @ vpxor %xmm1, %xmm1, %xmm1 + veor q6, q6, q0 @ vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a + vmov q0, q6 @ vmovdqa %xmm6, %xmm0 + vmov d12, d2 @ vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros + bx lr +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + +@@ +@@ .aes_schedule_round +@@ +@@ Runs one main round of the key schedule on q0, q7 +@@ +@@ Specifically, runs subbytes on the high dword of q0 +@@ then rotates it by one byte and xors into the low dword of +@@ q7. +@@ +@@ Adds rcon from low byte of q8, then rotates q8 for +@@ next rcon. +@@ +@@ Smears the dwords of q7 by xoring the low into the +@@ second low, result into third, result into highest. +@@ +@@ Returns results in q7 = q0. +@@ Clobbers q1-q4, r11. +@@ +.type _vpaes_schedule_round,%function +.align 4 +_vpaes_schedule_round: + @ extract rcon from xmm8 + vmov.i8 q4, #0 @ vpxor %xmm4, %xmm4, %xmm4 + vext.8 q1, q8, q4, #15 @ vpalignr $15, %xmm8, %xmm4, %xmm1 + vext.8 q8, q8, q8, #15 @ vpalignr $15, %xmm8, %xmm8, %xmm8 + veor q7, q7, q1 @ vpxor %xmm1, %xmm7, %xmm7 + + @ rotate + vdup.32 q0, d1[1] @ vpshufd $0xFF, %xmm0, %xmm0 + vext.8 q0, q0, q0, #1 @ vpalignr $1, %xmm0, %xmm0, %xmm0 + + @ fall through... + + @ low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + @ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12. + @ We pin other values in _vpaes_key_preheat, so load them now. + adr r11, .Lk_sb1 + vld1.64 {q14,q15}, [r11] + + @ smear xmm7 + vext.8 q1, q4, q7, #12 @ vpslldq $4, %xmm7, %xmm1 + veor q7, q7, q1 @ vpxor %xmm1, %xmm7, %xmm7 + vext.8 q4, q4, q7, #8 @ vpslldq $8, %xmm7, %xmm4 + + @ subbytes + vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 # 0 = k + vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i + veor q7, q7, q4 @ vpxor %xmm4, %xmm7, %xmm7 + vtbl.8 d4, {q11}, d2 @ vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + vtbl.8 d5, {q11}, d3 + veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j + vtbl.8 d6, {q10}, d0 @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + vtbl.8 d7, {q10}, d1 + veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + vtbl.8 d9, {q10}, d3 + veor q7, q7, q12 @ vpxor .Lk_s63(%rip), %xmm7, %xmm7 + vtbl.8 d6, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak + vtbl.8 d7, {q10}, d7 + veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + vtbl.8 d4, {q10}, d8 @ vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak + vtbl.8 d5, {q10}, d9 + veor q3, q3, q1 @ vpxor %xmm1, %xmm3, %xmm3 # 2 = io + veor q2, q2, q0 @ vpxor %xmm0, %xmm2, %xmm2 # 3 = jo + vtbl.8 d8, {q15}, d6 @ vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou + vtbl.8 d9, {q15}, d7 + vtbl.8 d2, {q14}, d4 @ vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t + vtbl.8 d3, {q14}, d5 + veor q1, q1, q4 @ vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output + + @ add in smeared stuff + veor q0, q1, q7 @ vpxor %xmm7, %xmm1, %xmm0 + veor q7, q1, q7 @ vmovdqa %xmm0, %xmm7 + bx lr +.size _vpaes_schedule_round,.-_vpaes_schedule_round + +@@ +@@ .aes_schedule_transform +@@ +@@ Linear-transform q0 according to tables at [r11] +@@ +@@ Requires that q9 = 0x0F0F... as in preheat +@@ Output in q0 +@@ Clobbers q1, q2, q14, q15 +@@ +.type _vpaes_schedule_transform,%function +.align 4 +_vpaes_schedule_transform: + vld1.64 {q14,q15}, [r11] @ vmovdqa (%r11), %xmm2 # lo + @ vmovdqa 16(%r11), %xmm1 # hi + vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 + vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 + vtbl.8 d4, {q14}, d2 @ vpshufb %xmm1, %xmm2, %xmm2 + vtbl.8 d5, {q14}, d3 + vtbl.8 d0, {q15}, d0 @ vpshufb %xmm0, %xmm1, %xmm0 + vtbl.8 d1, {q15}, d1 + veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 + bx lr +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + +@@ +@@ .aes_schedule_mangle +@@ +@@ Mangles q0 from (basis-transformed) standard version +@@ to our version. +@@ +@@ On encrypt, +@@ xor with 0x63 +@@ multiply by circulant 0,1,1,1 +@@ apply shiftrows transform +@@ +@@ On decrypt, +@@ xor with 0x63 +@@ multiply by "inverse mixcolumns" circulant E,B,D,9 +@@ deskew +@@ apply shiftrows transform +@@ +@@ +@@ Writes out to [r2], and increments or decrements it +@@ Keeps track of round number mod 4 in r8 +@@ Preserves q0 +@@ Clobbers q1-q5 +@@ +.type _vpaes_schedule_mangle,%function +.align 4 +_vpaes_schedule_mangle: + tst r3, r3 + vmov q4, q0 @ vmovdqa %xmm0, %xmm4 # save xmm0 for later + adr r11, .Lk_mc_forward @ Must be aligned to 8 mod 16. + vld1.64 {q5}, [r11] @ vmovdqa .Lk_mc_forward(%rip),%xmm5 + bne .Lschedule_mangle_dec + + @ encrypting + @ Write to q2 so we do not overlap table and destination below. + veor q2, q0, q12 @ vpxor .Lk_s63(%rip), %xmm0, %xmm4 + add r2, r2, #16 @ add $16, %rdx + vtbl.8 d8, {q2}, d10 @ vpshufb %xmm5, %xmm4, %xmm4 + vtbl.8 d9, {q2}, d11 + vtbl.8 d2, {q4}, d10 @ vpshufb %xmm5, %xmm4, %xmm1 + vtbl.8 d3, {q4}, d11 + vtbl.8 d6, {q1}, d10 @ vpshufb %xmm5, %xmm1, %xmm3 + vtbl.8 d7, {q1}, d11 + veor q4, q4, q1 @ vpxor %xmm1, %xmm4, %xmm4 + vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 + veor q3, q3, q4 @ vpxor %xmm4, %xmm3, %xmm3 + + b .Lschedule_mangle_both +.align 4 +.Lschedule_mangle_dec: + @ inverse mix columns + adr r11, .Lk_dksd @ lea .Lk_dksd(%rip),%r11 + vshr.u8 q1, q4, #4 @ vpsrlb $4, %xmm4, %xmm1 # 1 = hi + vand q4, q4, q9 @ vpand %xmm9, %xmm4, %xmm4 # 4 = lo + + vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x00(%r11), %xmm2 + @ vmovdqa 0x10(%r11), %xmm3 + vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 + vtbl.8 d5, {q14}, d9 + vtbl.8 d6, {q15}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 + vtbl.8 d7, {q15}, d3 + @ Load .Lk_dksb ahead of time. + vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x20(%r11), %xmm2 + @ vmovdqa 0x30(%r11), %xmm3 + @ Write to q13 so we do not overlap table and destination. + veor q13, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 + vtbl.8 d6, {q13}, d10 @ vpshufb %xmm5, %xmm3, %xmm3 + vtbl.8 d7, {q13}, d11 + + vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 + vtbl.8 d5, {q14}, d9 + veor q2, q2, q3 @ vpxor %xmm3, %xmm2, %xmm2 + vtbl.8 d6, {q15}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 + vtbl.8 d7, {q15}, d3 + @ Load .Lk_dkse ahead of time. + vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x40(%r11), %xmm2 + @ vmovdqa 0x50(%r11), %xmm3 + @ Write to q13 so we do not overlap table and destination. + veor q13, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 + vtbl.8 d6, {q13}, d10 @ vpshufb %xmm5, %xmm3, %xmm3 + vtbl.8 d7, {q13}, d11 + + vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 + vtbl.8 d5, {q14}, d9 + veor q2, q2, q3 @ vpxor %xmm3, %xmm2, %xmm2 + vtbl.8 d6, {q15}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 + vtbl.8 d7, {q15}, d3 + @ Load .Lk_dkse ahead of time. + vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x60(%r11), %xmm2 + @ vmovdqa 0x70(%r11), %xmm4 + @ Write to q13 so we do not overlap table and destination. + veor q13, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 + + vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 + vtbl.8 d5, {q14}, d9 + vtbl.8 d6, {q13}, d10 @ vpshufb %xmm5, %xmm3, %xmm3 + vtbl.8 d7, {q13}, d11 + vtbl.8 d8, {q15}, d2 @ vpshufb %xmm1, %xmm4, %xmm4 + vtbl.8 d9, {q15}, d3 + vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 + veor q2, q2, q3 @ vpxor %xmm3, %xmm2, %xmm2 + veor q3, q4, q2 @ vpxor %xmm2, %xmm4, %xmm3 + + sub r2, r2, #16 @ add $-16, %rdx + +.Lschedule_mangle_both: + @ Write to q2 so table and destination do not overlap. + vtbl.8 d4, {q3}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 + vtbl.8 d5, {q3}, d3 + add r8, r8, #64-16 @ add $-16, %r8 + and r8, r8, #~(1<<6) @ and $0x30, %r8 + vst1.64 {q2}, [r2] @ vmovdqu %xmm3, (%rdx) + bx lr +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + +.globl vpaes_set_encrypt_key +.hidden vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,%function +.align 4 +vpaes_set_encrypt_key: + stmdb sp!, {r7,r8,r9,r10,r11, lr} + vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + + lsr r9, r1, #5 @ shr $5,%eax + add r9, r9, #5 @ $5,%eax + str r9, [r2,#240] @ mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + mov r3, #0 @ mov $0,%ecx + mov r8, #0x30 @ mov $0x30,%r8d + bl _vpaes_schedule_core + eor r0, r0, r0 + + vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.hidden vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,%function +.align 4 +vpaes_set_decrypt_key: + stmdb sp!, {r7,r8,r9,r10,r11, lr} + vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + + lsr r9, r1, #5 @ shr $5,%eax + add r9, r9, #5 @ $5,%eax + str r9, [r2,#240] @ mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + lsl r9, r9, #4 @ shl $4,%eax + add r2, r2, #16 @ lea 16(%rdx,%rax),%rdx + add r2, r2, r9 + + mov r3, #1 @ mov $1,%ecx + lsr r8, r1, #1 @ shr $1,%r8d + and r8, r8, #32 @ and $32,%r8d + eor r8, r8, #32 @ xor $32,%r8d # nbits==192?0:32 + bl _vpaes_schedule_core + + vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +@ Additional constants for converting to bsaes. +.type _vpaes_convert_consts,%object +.align 4 +_vpaes_convert_consts: +@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear +@ transform in the AES S-box. 0x63 is incorporated into the low half of the +@ table. This was computed with the following script: +@ +@ def u64s_to_u128(x, y): +@ return x | (y << 64) +@ def u128_to_u64s(w): +@ return w & ((1<<64)-1), w >> 64 +@ def get_byte(w, i): +@ return (w >> (i*8)) & 0xff +@ def apply_table(table, b): +@ lo = b & 0xf +@ hi = b >> 4 +@ return get_byte(table[0], lo) ^ get_byte(table[1], hi) +@ def opt(b): +@ table = [ +@ u64s_to_u128(0xFF9F4929D6B66000, 0xF7974121DEBE6808), +@ u64s_to_u128(0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0), +@ ] +@ return apply_table(table, b) +@ def rot_byte(b, n): +@ return 0xff & ((b << n) | (b >> (8-n))) +@ def skew(x): +@ return (x ^ rot_byte(x, 1) ^ rot_byte(x, 2) ^ rot_byte(x, 3) ^ +@ rot_byte(x, 4)) +@ table = [0, 0] +@ for i in range(16): +@ table[0] |= (skew(opt(i)) ^ 0x63) << (i*8) +@ table[1] |= skew(opt(i<<4)) << (i*8) +@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[0])) +@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[1])) +.Lk_opt_then_skew: +.quad 0x9cb8436798bc4763, 0x6440bb9f6044bf9b +.quad 0x1f30062936192f00, 0xb49bad829db284ab + +@ .Lk_decrypt_transform is a permutation which performs an 8-bit left-rotation +@ followed by a byte-swap on each 32-bit word of a vector. E.g., 0x11223344 +@ becomes 0x22334411 and then 0x11443322. +.Lk_decrypt_transform: +.quad 0x0704050603000102, 0x0f0c0d0e0b08090a +.size _vpaes_convert_consts,.-_vpaes_convert_consts + +@ void vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes); +.globl vpaes_encrypt_key_to_bsaes +.hidden vpaes_encrypt_key_to_bsaes +.type vpaes_encrypt_key_to_bsaes,%function +.align 4 +vpaes_encrypt_key_to_bsaes: + stmdb sp!, {r11, lr} + + @ See _vpaes_schedule_core for the key schedule logic. In particular, + @ _vpaes_schedule_transform(.Lk_ipt) (section 2.2 of the paper), + @ _vpaes_schedule_mangle (section 4.3), and .Lschedule_mangle_last + @ contain the transformations not in the bsaes representation. This + @ function inverts those transforms. + @ + @ Note also that bsaes-armv7.pl expects aes-armv4.pl's key + @ representation, which does not match the other aes_nohw_* + @ implementations. The ARM aes_nohw_* stores each 32-bit word + @ byteswapped, as a convenience for (unsupported) big-endian ARM, at the + @ cost of extra REV and VREV32 operations in little-endian ARM. + + vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform + adr r2, .Lk_mc_forward @ Must be aligned to 8 mod 16. + add r3, r2, 0x90 @ .Lk_sr+0x10-.Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression) + + vld1.64 {q12}, [r2] + vmov.i8 q10, #0x5b @ .Lk_s63 from vpaes-x86_64 + adr r11, .Lk_opt @ Must be aligned to 8 mod 16. + vmov.i8 q11, #0x63 @ .LK_s63 without .Lk_ipt applied + + @ vpaes stores one fewer round count than bsaes, but the number of keys + @ is the same. + ldr r2, [r1,#240] + add r2, r2, #1 + str r2, [r0,#240] + + @ The first key is transformed with _vpaes_schedule_transform(.Lk_ipt). + @ Invert this with .Lk_opt. + vld1.64 {q0}, [r1]! + bl _vpaes_schedule_transform + vrev32.8 q0, q0 + vst1.64 {q0}, [r0]! + + @ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied, + @ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63, + @ multiplies by the circulant 0,1,1,1, then applies ShiftRows. +.Loop_enc_key_to_bsaes: + vld1.64 {q0}, [r1]! + + @ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle + @ r3 in the opposite direction and start at .Lk_sr+0x10 instead of 0x30. + @ We use r3 rather than r8 to avoid a callee-saved register. + vld1.64 {q1}, [r3] + vtbl.8 d4, {q0}, d2 + vtbl.8 d5, {q0}, d3 + add r3, r3, #16 + and r3, r3, #~(1<<6) + vmov q0, q2 + + @ Handle the last key differently. + subs r2, r2, #1 + beq .Loop_enc_key_to_bsaes_last + + @ Multiply by the circulant. This is its own inverse. + vtbl.8 d2, {q0}, d24 + vtbl.8 d3, {q0}, d25 + vmov q0, q1 + vtbl.8 d4, {q1}, d24 + vtbl.8 d5, {q1}, d25 + veor q0, q0, q2 + vtbl.8 d2, {q2}, d24 + vtbl.8 d3, {q2}, d25 + veor q0, q0, q1 + + @ XOR and finish. + veor q0, q0, q10 + bl _vpaes_schedule_transform + vrev32.8 q0, q0 + vst1.64 {q0}, [r0]! + b .Loop_enc_key_to_bsaes + +.Loop_enc_key_to_bsaes_last: + @ The final key does not have a basis transform (note + @ .Lschedule_mangle_last inverts the original transform). It only XORs + @ 0x63 and applies ShiftRows. The latter was already inverted in the + @ loop. Note that, because we act on the original representation, we use + @ q11, not q10. + veor q0, q0, q11 + vrev32.8 q0, q0 + vst1.64 {q0}, [r0] + + @ Wipe registers which contained key material. + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + + ldmia sp!, {r11, pc} @ return +.size vpaes_encrypt_key_to_bsaes,.-vpaes_encrypt_key_to_bsaes + +@ void vpaes_decrypt_key_to_bsaes(AES_KEY *vpaes, const AES_KEY *bsaes); +.globl vpaes_decrypt_key_to_bsaes +.hidden vpaes_decrypt_key_to_bsaes +.type vpaes_decrypt_key_to_bsaes,%function +.align 4 +vpaes_decrypt_key_to_bsaes: + stmdb sp!, {r11, lr} + + @ See _vpaes_schedule_core for the key schedule logic. Note vpaes + @ computes the decryption key schedule in reverse. Additionally, + @ aes-x86_64.pl shares some transformations, so we must only partially + @ invert vpaes's transformations. In general, vpaes computes in a + @ different basis (.Lk_ipt and .Lk_opt) and applies the inverses of + @ MixColumns, ShiftRows, and the affine part of the AES S-box (which is + @ split into a linear skew and XOR of 0x63). We undo all but MixColumns. + @ + @ Note also that bsaes-armv7.pl expects aes-armv4.pl's key + @ representation, which does not match the other aes_nohw_* + @ implementations. The ARM aes_nohw_* stores each 32-bit word + @ byteswapped, as a convenience for (unsupported) big-endian ARM, at the + @ cost of extra REV and VREV32 operations in little-endian ARM. + + adr r2, .Lk_decrypt_transform + adr r3, .Lk_sr+0x30 + adr r11, .Lk_opt_then_skew @ Input to _vpaes_schedule_transform. + vld1.64 {q12}, [r2] @ Reuse q12 from encryption. + vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform + + @ vpaes stores one fewer round count than bsaes, but the number of keys + @ is the same. + ldr r2, [r1,#240] + add r2, r2, #1 + str r2, [r0,#240] + + @ Undo the basis change and reapply the S-box affine transform. See + @ .Lschedule_mangle_last. + vld1.64 {q0}, [r1]! + bl _vpaes_schedule_transform + vrev32.8 q0, q0 + vst1.64 {q0}, [r0]! + + @ See _vpaes_schedule_mangle for the transform on the middle keys. Note + @ it simultaneously inverts MixColumns and the S-box affine transform. + @ See .Lk_dksd through .Lk_dks9. +.Loop_dec_key_to_bsaes: + vld1.64 {q0}, [r1]! + + @ Invert the ShiftRows step (see .Lschedule_mangle_both). Note going + @ forwards cancels inverting for which direction we cycle r3. We use r3 + @ rather than r8 to avoid a callee-saved register. + vld1.64 {q1}, [r3] + vtbl.8 d4, {q0}, d2 + vtbl.8 d5, {q0}, d3 + add r3, r3, #64-16 + and r3, r3, #~(1<<6) + vmov q0, q2 + + @ Handle the last key differently. + subs r2, r2, #1 + beq .Loop_dec_key_to_bsaes_last + + @ Undo the basis change and reapply the S-box affine transform. + bl _vpaes_schedule_transform + + @ Rotate each word by 8 bytes (cycle the rows) and then byte-swap. We + @ combine the two operations in .Lk_decrypt_transform. + @ + @ TODO(davidben): Where does the rotation come from? + vtbl.8 d2, {q0}, d24 + vtbl.8 d3, {q0}, d25 + + vst1.64 {q1}, [r0]! + b .Loop_dec_key_to_bsaes + +.Loop_dec_key_to_bsaes_last: + @ The final key only inverts ShiftRows (already done in the loop). See + @ .Lschedule_am_decrypting. Its basis is not transformed. + vrev32.8 q0, q0 + vst1.64 {q0}, [r0]! + + @ Wipe registers which contained key material. + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + + ldmia sp!, {r11, pc} @ return +.size vpaes_decrypt_key_to_bsaes,.-vpaes_decrypt_key_to_bsaes +.globl vpaes_ctr32_encrypt_blocks +.hidden vpaes_ctr32_encrypt_blocks +.type vpaes_ctr32_encrypt_blocks,%function +.align 4 +vpaes_ctr32_encrypt_blocks: + mov ip, sp + stmdb sp!, {r7,r8,r9,r10,r11, lr} + @ This function uses q4-q7 (d8-d15), which are callee-saved. + vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + + cmp r2, #0 + @ r8 is passed on the stack. + ldr r8, [ip] + beq .Lctr32_done + + @ _vpaes_encrypt_core expects the key in r2, so swap r2 and r3. + mov r9, r3 + mov r3, r2 + mov r2, r9 + + @ Load the IV and counter portion. + ldr r7, [r8, #12] + vld1.8 {q7}, [r8] + + bl _vpaes_preheat + rev r7, r7 @ The counter is big-endian. + +.Lctr32_loop: + vmov q0, q7 + vld1.8 {q6}, [r0]! @ .Load input ahead of time + bl _vpaes_encrypt_core + veor q0, q0, q6 @ XOR input and result + vst1.8 {q0}, [r1]! + subs r3, r3, #1 + @ Update the counter. + add r7, r7, #1 + rev r9, r7 + vmov.32 d15[1], r9 + bne .Lctr32_loop + +.Lctr32_done: + vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return +.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-apple.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-apple.S new file mode 100644 index 0000000000..a108a96882 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-apple.S @@ -0,0 +1,1224 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.section __TEXT,__const + + +.align 7 // totally strategic alignment +_vpaes_consts: +Lk_mc_forward: // mc_forward +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 +Lk_mc_backward: // mc_backward +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F +Lk_sr: // sr +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +// +// "Hot" constants +// +Lk_inv: // inv, inva +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 +Lk_ipt: // input transform (lo, hi) +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 +Lk_sbo: // sbou, sbot +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA +Lk_sb1: // sb1u, sb1t +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +Lk_sb2: // sb2u, sb2t +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD + +// +// Decryption stuff +// +Lk_dipt: // decryption input transform +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 +Lk_dsbo: // decryption sbox final output +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +Lk_dsb9: // decryption sbox output *9*u, *9*t +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +Lk_dsbd: // decryption sbox output *D*u, *D*t +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +Lk_dsbb: // decryption sbox output *B*u, *B*t +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +Lk_dsbe: // decryption sbox output *E*u, *E*t +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 + +// +// Key schedule constants +// +Lk_dksd: // decryption key schedule: invskew x*D +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +Lk_dksb: // decryption key schedule: invskew x*B +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +Lk_dkse: // decryption key schedule: invskew x*E + 0x63 +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +Lk_dks9: // decryption key schedule: invskew x*9 +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + +Lk_rcon: // rcon +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +Lk_opt: // output transform +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 +Lk_deskew: // deskew tables: inverts the sbox's "skew" +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 2 + +.align 6 + +.text +## +## _aes_preheat +## +## Fills register %r10 -> .aes_consts (so you can -fPIC) +## and %xmm9-%xmm15 as specified below. +## + +.align 4 +_vpaes_encrypt_preheat: + adrp x10, Lk_inv@PAGE + add x10, x10, Lk_inv@PAGEOFF + movi v17.16b, #0x0f + ld1 {v18.2d,v19.2d}, [x10],#32 // Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // Lk_ipt, Lk_sbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10] // Lk_sb1, Lk_sb2 + ret + + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm9-%xmm15 as in _vpaes_preheat +## (%rdx) = scheduled keys +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax +## Preserves %xmm6 - %xmm8 so you get some local vectors +## +## + +.align 4 +_vpaes_encrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, Lk_mc_forward@PAGE+16 + add x11, x11, Lk_mc_forward@PAGEOFF+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b Lenc_entry + +.align 4 +Lenc_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + sub w8, w8, #1 // nr-- + +Lenc_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v5.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, Lenc_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + ret + + +.globl _vpaes_encrypt +.private_extern _vpaes_encrypt + +.align 4 +_vpaes_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_encrypt_preheat + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + + +.align 4 +_vpaes_encrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, Lk_mc_forward@PAGE+16 + add x11, x11, Lk_mc_forward@PAGEOFF+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + tbl v9.16b, {v20.16b}, v9.16b + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + tbl v10.16b, {v21.16b}, v8.16b + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v8.16b, v9.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b Lenc_2x_entry + +.align 4 +Lenc_2x_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + tbl v12.16b, {v25.16b}, v10.16b + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + tbl v8.16b, {v24.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + tbl v13.16b, {v27.16b}, v10.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + tbl v10.16b, {v26.16b}, v11.16b + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + tbl v11.16b, {v8.16b}, v1.16b + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + eor v10.16b, v10.16b, v13.16b + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + tbl v8.16b, {v8.16b}, v4.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + eor v11.16b, v11.16b, v10.16b + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + tbl v12.16b, {v11.16b},v1.16b + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + eor v8.16b, v8.16b, v11.16b + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + eor v8.16b, v8.16b, v12.16b + sub w8, w8, #1 // nr-- + +Lenc_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v5.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + tbl v13.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v13.16b + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v13.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, Lenc_2x_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + tbl v8.16b, {v23.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v0.16b, {v0.16b},v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v1.16b + ret + + + +.align 4 +_vpaes_decrypt_preheat: + adrp x10, Lk_inv@PAGE + add x10, x10, Lk_inv@PAGEOFF + movi v17.16b, #0x0f + adrp x11, Lk_dipt@PAGE + add x11, x11, Lk_dipt@PAGEOFF + ld1 {v18.2d,v19.2d}, [x10],#32 // Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // Lk_dipt, Lk_dsbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // Lk_dsb9, Lk_dsbd + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x11] // Lk_dsbb, Lk_dsbe + ret + + +## +## Decryption core +## +## Same API as encryption core. +## + +.align 4 +_vpaes_decrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, Lk_sr@PAGE + add x10, x10, Lk_sr@PAGEOFF + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, Lk_mc_forward@PAGE+48 + add x10, x10, Lk_mc_forward@PAGEOFF+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + ld1 {v5.2d}, [x10] // vmovdqa Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b Ldec_entry + +.align 4 +Ldec_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + sub w8, w8, #1 // sub $1,%rax # nr-- + +Ldec_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, Ldec_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + ret + + +.globl _vpaes_decrypt +.private_extern _vpaes_decrypt + +.align 4 +_vpaes_decrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_decrypt_preheat + bl _vpaes_decrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// v14-v15 input, v0-v1 output + +.align 4 +_vpaes_decrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, Lk_sr@PAGE + add x10, x10, Lk_sr@PAGEOFF + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, Lk_mc_forward@PAGE+48 + add x10, x10, Lk_mc_forward@PAGEOFF+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v2.16b, {v20.16b},v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + tbl v10.16b, {v20.16b},v9.16b + ld1 {v5.2d}, [x10] // vmovdqa Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b},v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + tbl v8.16b, {v21.16b},v8.16b + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v10.16b, v10.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b Ldec_2x_entry + +.align 4 +Ldec_2x_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v12.16b, {v24.16b}, v10.16b + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + tbl v9.16b, {v25.16b}, v11.16b + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + eor v8.16b, v12.16b, v16.16b + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v12.16b, {v26.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + tbl v9.16b, {v27.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v12.16b, {v28.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + tbl v9.16b, {v29.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v12.16b, {v30.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + tbl v9.16b, {v31.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + sub w8, w8, #1 // sub $1,%rax # nr-- + +Ldec_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v2.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + tbl v10.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v10.16b + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v10.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, Ldec_2x_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + tbl v9.16b, {v23.16b}, v11.16b + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + eor v8.16b, v9.16b, v12.16b + tbl v0.16b, {v0.16b},v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v2.16b + ret + +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## + +.align 4 +_vpaes_key_preheat: + adrp x10, Lk_inv@PAGE + add x10, x10, Lk_inv@PAGEOFF + movi v16.16b, #0x5b // Lk_s63 + adrp x11, Lk_sb1@PAGE + add x11, x11, Lk_sb1@PAGEOFF + movi v17.16b, #0x0f // Lk_s0F + ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // Lk_inv, Lk_ipt + adrp x10, Lk_dksd@PAGE + add x10, x10, Lk_dksd@PAGEOFF + ld1 {v22.2d,v23.2d}, [x11] // Lk_sb1 + adrp x11, Lk_mc_forward@PAGE + add x11, x11, Lk_mc_forward@PAGEOFF + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // Lk_dksd, Lk_dksb + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // Lk_dkse, Lk_dks9 + ld1 {v8.2d}, [x10] // Lk_rcon + ld1 {v9.2d}, [x11] // Lk_mc_forward[0] + ret + + + +.align 4 +_vpaes_schedule_core: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp,#-16]! + add x29,sp,#0 + + bl _vpaes_key_preheat // load the tables + + ld1 {v0.16b}, [x0],#16 // vmovdqu (%rdi), %xmm0 # load key (unaligned) + + // input transform + mov v3.16b, v0.16b // vmovdqa %xmm0, %xmm3 + bl _vpaes_schedule_transform + mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 + + adrp x10, Lk_sr@PAGE // lea Lk_sr(%rip),%r10 + add x10, x10, Lk_sr@PAGEOFF + + add x8, x8, x10 + cbnz w3, Lschedule_am_decrypting + + // encrypting, output zeroth round key after transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) + b Lschedule_go + +Lschedule_am_decrypting: + // decrypting, output zeroth round key after shiftrows + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + eor x8, x8, #0x30 // xor $0x30, %r8 + +Lschedule_go: + cmp w1, #192 // cmp $192, %esi + b.hi Lschedule_256 + b.eq Lschedule_192 + // 128: fall though + +## +## .schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +Lschedule_128: + mov x0, #10 // mov $10, %esi + +Loop_schedule_128: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + cbz x0, Lschedule_mangle_last + bl _vpaes_schedule_mangle // write output + b Loop_schedule_128 + +## +## .aes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +.align 4 +Lschedule_192: + sub x0, x0, #8 + ld1 {v0.16b}, [x0] // vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + bl _vpaes_schedule_transform // input transform + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save short part + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 # clear 4 + ins v6.d[0], v4.d[0] // vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros + mov x0, #4 // mov $4, %esi + +Loop_schedule_192: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + ext v0.16b, v6.16b, v0.16b, #8 // vpalignr $8,%xmm6,%xmm0,%xmm0 + bl _vpaes_schedule_mangle // save key n + bl _vpaes_schedule_192_smear + bl _vpaes_schedule_mangle // save key n+1 + bl _vpaes_schedule_round + cbz x0, Lschedule_mangle_last + bl _vpaes_schedule_mangle // save key n+2 + bl _vpaes_schedule_192_smear + b Loop_schedule_192 + +## +## .aes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional "low side" in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +.align 4 +Lschedule_256: + ld1 {v0.16b}, [x0] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + bl _vpaes_schedule_transform // input transform + mov x0, #7 // mov $7, %esi + +Loop_schedule_256: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_mangle // output low result + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + // high round + bl _vpaes_schedule_round + cbz x0, Lschedule_mangle_last + bl _vpaes_schedule_mangle + + // low round. swap xmm7 and xmm6 + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + movi v4.16b, #0 + mov v5.16b, v7.16b // vmovdqa %xmm7, %xmm5 + mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 + bl _vpaes_schedule_low_round + mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 + + b Loop_schedule_256 + +## +## .aes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +.align 4 +Lschedule_mangle_last: + // schedule last round key from xmm0 + adrp x11, Lk_deskew@PAGE // lea Lk_deskew(%rip),%r11 # prepare to deskew + add x11, x11, Lk_deskew@PAGEOFF + + cbnz w3, Lschedule_mangle_last_dec + + // encrypting + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 + adrp x11, Lk_opt@PAGE // lea Lk_opt(%rip), %r11 # prepare to output transform + add x11, x11, Lk_opt@PAGEOFF + add x2, x2, #32 // add $32, %rdx + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute + +Lschedule_mangle_last_dec: + ld1 {v20.2d,v21.2d}, [x11] // reload constants + sub x2, x2, #16 // add $-16, %rdx + eor v0.16b, v0.16b, v16.16b // vpxor Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform // output transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) # save last key + + // cleanup + eor v0.16b, v0.16b, v0.16b // vpxor %xmm0, %xmm0, %xmm0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v2.16b, v2.16b, v2.16b // vpxor %xmm2, %xmm2, %xmm2 + eor v3.16b, v3.16b, v3.16b // vpxor %xmm3, %xmm3, %xmm3 + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 + eor v5.16b, v5.16b, v5.16b // vpxor %xmm5, %xmm5, %xmm5 + eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 + eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 + ldp x29, x30, [sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +## +## .aes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## + +.align 4 +_vpaes_schedule_192_smear: + movi v1.16b, #0 + dup v0.4s, v7.s[3] + ins v1.s[3], v6.s[2] // vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 + ins v0.s[0], v7.s[2] // vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + eor v6.16b, v6.16b, v1.16b // vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v6.16b, v6.16b, v0.16b // vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a + mov v0.16b, v6.16b // vmovdqa %xmm6, %xmm0 + ins v6.d[0], v1.d[0] // vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros + ret + + +## +## .aes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm4, %r11. +## + +.align 4 +_vpaes_schedule_round: + // extract rcon from xmm8 + movi v4.16b, #0 // vpxor %xmm4, %xmm4, %xmm4 + ext v1.16b, v8.16b, v4.16b, #15 // vpalignr $15, %xmm8, %xmm4, %xmm1 + ext v8.16b, v8.16b, v8.16b, #15 // vpalignr $15, %xmm8, %xmm8, %xmm8 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + + // rotate + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + ext v0.16b, v0.16b, v0.16b, #1 // vpalignr $1, %xmm0, %xmm0, %xmm0 + + // fall through... + + // low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + // smear xmm7 + ext v1.16b, v4.16b, v7.16b, #12 // vpslldq $4, %xmm7, %xmm1 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + ext v4.16b, v4.16b, v7.16b, #8 // vpslldq $8, %xmm7, %xmm4 + + // subbytes + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + eor v7.16b, v7.16b, v4.16b // vpxor %xmm4, %xmm7, %xmm7 + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v7.16b, v7.16b, v16.16b // vpxor Lk_s63(%rip), %xmm7, %xmm7 + tbl v3.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak + eor v3.16b, v3.16b, v1.16b // vpxor %xmm1, %xmm3, %xmm3 # 2 = io + eor v2.16b, v2.16b, v0.16b // vpxor %xmm0, %xmm2, %xmm2 # 3 = jo + tbl v4.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou + tbl v1.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t + eor v1.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output + + // add in smeared stuff + eor v0.16b, v1.16b, v7.16b // vpxor %xmm7, %xmm1, %xmm0 + eor v7.16b, v1.16b, v7.16b // vmovdqa %xmm0, %xmm7 + ret + + +## +## .aes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%r11) +## +## Requires that %xmm9 = 0x0F0F... as in preheat +## Output in %xmm0 +## Clobbers %xmm1, %xmm2 +## + +.align 4 +_vpaes_schedule_transform: + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + // vmovdqa (%r11), %xmm2 # lo + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + // vmovdqa 16(%r11), %xmm1 # hi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + ret + + +## +## .aes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by "inverse mixcolumns" circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%rdx), and increments or decrements it +## Keeps track of round number mod 4 in %r8 +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## + +.align 4 +_vpaes_schedule_mangle: + mov v4.16b, v0.16b // vmovdqa %xmm0, %xmm4 # save xmm0 for later + // vmovdqa .Lk_mc_forward(%rip),%xmm5 + cbnz w3, Lschedule_mangle_dec + + // encrypting + eor v4.16b, v0.16b, v16.16b // vpxor Lk_s63(%rip), %xmm0, %xmm4 + add x2, x2, #16 // add $16, %rdx + tbl v4.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm4 + tbl v1.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm1 + tbl v3.16b, {v1.16b}, v9.16b // vpshufb %xmm5, %xmm1, %xmm3 + eor v4.16b, v4.16b, v1.16b // vpxor %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v3.16b, v3.16b, v4.16b // vpxor %xmm4, %xmm3, %xmm3 + + b Lschedule_mangle_both +.align 4 +Lschedule_mangle_dec: + // inverse mix columns + // lea .Lk_dksd(%rip),%r11 + ushr v1.16b, v4.16b, #4 // vpsrlb $4, %xmm4, %xmm1 # 1 = hi + and v4.16b, v4.16b, v17.16b // vpand %xmm9, %xmm4, %xmm4 # 4 = lo + + // vmovdqa 0x00(%r11), %xmm2 + tbl v2.16b, {v24.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + // vmovdqa 0x10(%r11), %xmm3 + tbl v3.16b, {v25.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x20(%r11), %xmm2 + tbl v2.16b, {v26.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x30(%r11), %xmm3 + tbl v3.16b, {v27.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x40(%r11), %xmm2 + tbl v2.16b, {v28.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x50(%r11), %xmm3 + tbl v3.16b, {v29.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + + // vmovdqa 0x60(%r11), %xmm2 + tbl v2.16b, {v30.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + // vmovdqa 0x70(%r11), %xmm4 + tbl v4.16b, {v31.16b}, v1.16b // vpshufb %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + eor v3.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm3 + + sub x2, x2, #16 // add $-16, %rdx + +Lschedule_mangle_both: + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + add x8, x8, #48 // add $-16, %r8 + and x8, x8, #~(1<<6) // and $0x30, %r8 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + ret + + +.globl _vpaes_set_encrypt_key +.private_extern _vpaes_set_encrypt_key + +.align 4 +_vpaes_set_encrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + mov w3, #0 // mov $0,%ecx + mov x8, #0x30 // mov $0x30,%r8d + bl _vpaes_schedule_core + eor x0, x0, x0 + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.globl _vpaes_set_decrypt_key +.private_extern _vpaes_set_decrypt_key + +.align 4 +_vpaes_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + lsl w9, w9, #4 // shl $4,%eax + add x2, x2, #16 // lea 16(%rdx,%rax),%rdx + add x2, x2, x9 + + mov w3, #1 // mov $1,%ecx + lsr w8, w1, #1 // shr $1,%r8d + and x8, x8, #32 // and $32,%r8d + eor x8, x8, #32 // xor $32,%r8d # nbits==192?0:32 + bl _vpaes_schedule_core + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl _vpaes_cbc_encrypt +.private_extern _vpaes_cbc_encrypt + +.align 4 +_vpaes_cbc_encrypt: + AARCH64_SIGN_LINK_REGISTER + cbz x2, Lcbc_abort + cmp w5, #0 // check direction + b.eq vpaes_cbc_decrypt + + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x17, x2 // reassign + mov x2, x3 // reassign + + ld1 {v0.16b}, [x4] // load ivec + bl _vpaes_encrypt_preheat + b Lcbc_enc_loop + +.align 4 +Lcbc_enc_loop: + ld1 {v7.16b}, [x0],#16 // load input + eor v7.16b, v7.16b, v0.16b // xor with ivec + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1],#16 // save output + subs x17, x17, #16 + b.hi Lcbc_enc_loop + + st1 {v0.16b}, [x4] // write ivec + + ldp x29,x30,[sp],#16 +Lcbc_abort: + AARCH64_VALIDATE_LINK_REGISTER + ret + + + +.align 4 +vpaes_cbc_decrypt: + // Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to + // only from vpaes_cbc_encrypt which has already signed the return address. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + mov x17, x2 // reassign + mov x2, x3 // reassign + ld1 {v6.16b}, [x4] // load ivec + bl _vpaes_decrypt_preheat + tst x17, #16 + b.eq Lcbc_dec_loop2x + + ld1 {v7.16b}, [x0], #16 // load input + bl _vpaes_decrypt_core + eor v0.16b, v0.16b, v6.16b // xor with ivec + orr v6.16b, v7.16b, v7.16b // next ivec value + st1 {v0.16b}, [x1], #16 + subs x17, x17, #16 + b.ls Lcbc_dec_done + +.align 4 +Lcbc_dec_loop2x: + ld1 {v14.16b,v15.16b}, [x0], #32 + bl _vpaes_decrypt_2x + eor v0.16b, v0.16b, v6.16b // xor with ivec + eor v1.16b, v1.16b, v14.16b + orr v6.16b, v15.16b, v15.16b + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #32 + b.hi Lcbc_dec_loop2x + +Lcbc_dec_done: + st1 {v6.16b}, [x4] + + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl _vpaes_ctr32_encrypt_blocks +.private_extern _vpaes_ctr32_encrypt_blocks + +.align 4 +_vpaes_ctr32_encrypt_blocks: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + cbz x2, Lctr32_done + + // Note, unlike the other functions, x2 here is measured in blocks, + // not bytes. + mov x17, x2 + mov x2, x3 + + // Load the IV and counter portion. + ldr w6, [x4, #12] + ld1 {v7.16b}, [x4] + + bl _vpaes_encrypt_preheat + tst x17, #1 + rev w6, w6 // The counter is big-endian. + b.eq Lctr32_prep_loop + + // Handle one block so the remaining block count is even for + // _vpaes_encrypt_2x. + ld1 {v6.16b}, [x0], #16 // Load input ahead of time + bl _vpaes_encrypt_core + eor v0.16b, v0.16b, v6.16b // XOR input and result + st1 {v0.16b}, [x1], #16 + subs x17, x17, #1 + // Update the counter. + add w6, w6, #1 + rev w7, w6 + mov v7.s[3], w7 + b.ls Lctr32_done + +Lctr32_prep_loop: + // _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x + // uses v14 and v15. + mov v15.16b, v7.16b + mov v14.16b, v7.16b + add w6, w6, #1 + rev w7, w6 + mov v15.s[3], w7 + +Lctr32_loop: + ld1 {v6.16b,v7.16b}, [x0], #32 // Load input ahead of time + bl _vpaes_encrypt_2x + eor v0.16b, v0.16b, v6.16b // XOR input and result + eor v1.16b, v1.16b, v7.16b // XOR input and result (#2) + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #2 + // Update the counter. + add w7, w6, #1 + add w6, w6, #2 + rev w7, w7 + mov v14.s[3], w7 + rev w7, w6 + mov v15.s[3], w7 + b.hi Lctr32_loop + +Lctr32_done: + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-linux.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-linux.S new file mode 100644 index 0000000000..c343f0050f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-linux.S @@ -0,0 +1,1224 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.section .rodata + +.type _vpaes_consts,%object +.align 7 // totally strategic alignment +_vpaes_consts: +.Lk_mc_forward: // mc_forward +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 +.Lk_mc_backward: // mc_backward +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F +.Lk_sr: // sr +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +// +// "Hot" constants +// +.Lk_inv: // inv, inva +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 +.Lk_ipt: // input transform (lo, hi) +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 +.Lk_sbo: // sbou, sbot +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA +.Lk_sb1: // sb1u, sb1t +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.Lk_sb2: // sb2u, sb2t +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD + +// +// Decryption stuff +// +.Lk_dipt: // decryption input transform +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 +.Lk_dsbo: // decryption sbox final output +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.Lk_dsb9: // decryption sbox output *9*u, *9*t +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: // decryption sbox output *D*u, *D*t +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: // decryption sbox output *B*u, *B*t +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: // decryption sbox output *E*u, *E*t +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 + +// +// Key schedule constants +// +.Lk_dksd: // decryption key schedule: invskew x*D +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: // decryption key schedule: invskew x*B +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: // decryption key schedule: invskew x*E + 0x63 +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: // decryption key schedule: invskew x*9 +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + +.Lk_rcon: // rcon +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_opt: // output transform +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 +.Lk_deskew: // deskew tables: inverts the sbox's "skew" +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 2 +.size _vpaes_consts,.-_vpaes_consts +.align 6 + +.text +## +## _aes_preheat +## +## Fills register %r10 -> .aes_consts (so you can -fPIC) +## and %xmm9-%xmm15 as specified below. +## +.type _vpaes_encrypt_preheat,%function +.align 4 +_vpaes_encrypt_preheat: + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv + movi v17.16b, #0x0f + ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10] // .Lk_sb1, .Lk_sb2 + ret +.size _vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm9-%xmm15 as in _vpaes_preheat +## (%rdx) = scheduled keys +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax +## Preserves %xmm6 - %xmm8 so you get some local vectors +## +## +.type _vpaes_encrypt_core,%function +.align 4 +_vpaes_encrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, .Lk_mc_forward+16 + add x11, x11, :lo12:.Lk_mc_forward+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b .Lenc_entry + +.align 4 +.Lenc_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + sub w8, w8, #1 // nr-- + +.Lenc_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v5.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, .Lenc_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + +.globl vpaes_encrypt +.hidden vpaes_encrypt +.type vpaes_encrypt,%function +.align 4 +vpaes_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_encrypt_preheat + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_encrypt,.-vpaes_encrypt + +.type _vpaes_encrypt_2x,%function +.align 4 +_vpaes_encrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, .Lk_mc_forward+16 + add x11, x11, :lo12:.Lk_mc_forward+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + tbl v9.16b, {v20.16b}, v9.16b + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + tbl v10.16b, {v21.16b}, v8.16b + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v8.16b, v9.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b .Lenc_2x_entry + +.align 4 +.Lenc_2x_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + tbl v12.16b, {v25.16b}, v10.16b + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + tbl v8.16b, {v24.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + tbl v13.16b, {v27.16b}, v10.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + tbl v10.16b, {v26.16b}, v11.16b + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + tbl v11.16b, {v8.16b}, v1.16b + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + eor v10.16b, v10.16b, v13.16b + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + tbl v8.16b, {v8.16b}, v4.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + eor v11.16b, v11.16b, v10.16b + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + tbl v12.16b, {v11.16b},v1.16b + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + eor v8.16b, v8.16b, v11.16b + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + eor v8.16b, v8.16b, v12.16b + sub w8, w8, #1 // nr-- + +.Lenc_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v5.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + tbl v13.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v13.16b + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v13.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, .Lenc_2x_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + tbl v8.16b, {v23.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v0.16b, {v0.16b},v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v1.16b + ret +.size _vpaes_encrypt_2x,.-_vpaes_encrypt_2x + +.type _vpaes_decrypt_preheat,%function +.align 4 +_vpaes_decrypt_preheat: + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv + movi v17.16b, #0x0f + adrp x11, .Lk_dipt + add x11, x11, :lo12:.Lk_dipt + ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x11] // .Lk_dsbb, .Lk_dsbe + ret +.size _vpaes_decrypt_preheat,.-_vpaes_decrypt_preheat + +## +## Decryption core +## +## Same API as encryption core. +## +.type _vpaes_decrypt_core,%function +.align 4 +_vpaes_decrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, .Lk_sr + add x10, x10, :lo12:.Lk_sr + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, .Lk_mc_forward+48 + add x10, x10, :lo12:.Lk_mc_forward+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + ld1 {v5.2d}, [x10] // vmovdqa .Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b .Ldec_entry + +.align 4 +.Ldec_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + sub w8, w8, #1 // sub $1,%rax # nr-- + +.Ldec_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, .Ldec_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + +.globl vpaes_decrypt +.hidden vpaes_decrypt +.type vpaes_decrypt,%function +.align 4 +vpaes_decrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_decrypt_preheat + bl _vpaes_decrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_decrypt,.-vpaes_decrypt + +// v14-v15 input, v0-v1 output +.type _vpaes_decrypt_2x,%function +.align 4 +_vpaes_decrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, .Lk_sr + add x10, x10, :lo12:.Lk_sr + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, .Lk_mc_forward+48 + add x10, x10, :lo12:.Lk_mc_forward+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v2.16b, {v20.16b},v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + tbl v10.16b, {v20.16b},v9.16b + ld1 {v5.2d}, [x10] // vmovdqa .Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b},v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + tbl v8.16b, {v21.16b},v8.16b + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v10.16b, v10.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b .Ldec_2x_entry + +.align 4 +.Ldec_2x_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v12.16b, {v24.16b}, v10.16b + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + tbl v9.16b, {v25.16b}, v11.16b + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + eor v8.16b, v12.16b, v16.16b + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v12.16b, {v26.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + tbl v9.16b, {v27.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v12.16b, {v28.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + tbl v9.16b, {v29.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v12.16b, {v30.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + tbl v9.16b, {v31.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + sub w8, w8, #1 // sub $1,%rax # nr-- + +.Ldec_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v2.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + tbl v10.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v10.16b + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v10.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, .Ldec_2x_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + tbl v9.16b, {v23.16b}, v11.16b + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + eor v8.16b, v9.16b, v12.16b + tbl v0.16b, {v0.16b},v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v2.16b + ret +.size _vpaes_decrypt_2x,.-_vpaes_decrypt_2x +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## +.type _vpaes_key_preheat,%function +.align 4 +_vpaes_key_preheat: + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv + movi v16.16b, #0x5b // .Lk_s63 + adrp x11, .Lk_sb1 + add x11, x11, :lo12:.Lk_sb1 + movi v17.16b, #0x0f // .Lk_s0F + ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // .Lk_inv, .Lk_ipt + adrp x10, .Lk_dksd + add x10, x10, :lo12:.Lk_dksd + ld1 {v22.2d,v23.2d}, [x11] // .Lk_sb1 + adrp x11, .Lk_mc_forward + add x11, x11, :lo12:.Lk_mc_forward + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9 + ld1 {v8.2d}, [x10] // .Lk_rcon + ld1 {v9.2d}, [x11] // .Lk_mc_forward[0] + ret +.size _vpaes_key_preheat,.-_vpaes_key_preheat + +.type _vpaes_schedule_core,%function +.align 4 +_vpaes_schedule_core: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp,#-16]! + add x29,sp,#0 + + bl _vpaes_key_preheat // load the tables + + ld1 {v0.16b}, [x0],#16 // vmovdqu (%rdi), %xmm0 # load key (unaligned) + + // input transform + mov v3.16b, v0.16b // vmovdqa %xmm0, %xmm3 + bl _vpaes_schedule_transform + mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 + + adrp x10, .Lk_sr // lea .Lk_sr(%rip),%r10 + add x10, x10, :lo12:.Lk_sr + + add x8, x8, x10 + cbnz w3, .Lschedule_am_decrypting + + // encrypting, output zeroth round key after transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) + b .Lschedule_go + +.Lschedule_am_decrypting: + // decrypting, output zeroth round key after shiftrows + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + eor x8, x8, #0x30 // xor $0x30, %r8 + +.Lschedule_go: + cmp w1, #192 // cmp $192, %esi + b.hi .Lschedule_256 + b.eq .Lschedule_192 + // 128: fall though + +## +## .schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +.Lschedule_128: + mov x0, #10 // mov $10, %esi + +.Loop_schedule_128: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + cbz x0, .Lschedule_mangle_last + bl _vpaes_schedule_mangle // write output + b .Loop_schedule_128 + +## +## .aes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +.align 4 +.Lschedule_192: + sub x0, x0, #8 + ld1 {v0.16b}, [x0] // vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + bl _vpaes_schedule_transform // input transform + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save short part + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 # clear 4 + ins v6.d[0], v4.d[0] // vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros + mov x0, #4 // mov $4, %esi + +.Loop_schedule_192: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + ext v0.16b, v6.16b, v0.16b, #8 // vpalignr $8,%xmm6,%xmm0,%xmm0 + bl _vpaes_schedule_mangle // save key n + bl _vpaes_schedule_192_smear + bl _vpaes_schedule_mangle // save key n+1 + bl _vpaes_schedule_round + cbz x0, .Lschedule_mangle_last + bl _vpaes_schedule_mangle // save key n+2 + bl _vpaes_schedule_192_smear + b .Loop_schedule_192 + +## +## .aes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional "low side" in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +.align 4 +.Lschedule_256: + ld1 {v0.16b}, [x0] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + bl _vpaes_schedule_transform // input transform + mov x0, #7 // mov $7, %esi + +.Loop_schedule_256: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_mangle // output low result + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + // high round + bl _vpaes_schedule_round + cbz x0, .Lschedule_mangle_last + bl _vpaes_schedule_mangle + + // low round. swap xmm7 and xmm6 + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + movi v4.16b, #0 + mov v5.16b, v7.16b // vmovdqa %xmm7, %xmm5 + mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 + bl _vpaes_schedule_low_round + mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 + + b .Loop_schedule_256 + +## +## .aes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +.align 4 +.Lschedule_mangle_last: + // schedule last round key from xmm0 + adrp x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew + add x11, x11, :lo12:.Lk_deskew + + cbnz w3, .Lschedule_mangle_last_dec + + // encrypting + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 + adrp x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform + add x11, x11, :lo12:.Lk_opt + add x2, x2, #32 // add $32, %rdx + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute + +.Lschedule_mangle_last_dec: + ld1 {v20.2d,v21.2d}, [x11] // reload constants + sub x2, x2, #16 // add $-16, %rdx + eor v0.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform // output transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) # save last key + + // cleanup + eor v0.16b, v0.16b, v0.16b // vpxor %xmm0, %xmm0, %xmm0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v2.16b, v2.16b, v2.16b // vpxor %xmm2, %xmm2, %xmm2 + eor v3.16b, v3.16b, v3.16b // vpxor %xmm3, %xmm3, %xmm3 + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 + eor v5.16b, v5.16b, v5.16b // vpxor %xmm5, %xmm5, %xmm5 + eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 + eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 + ldp x29, x30, [sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core + +## +## .aes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## +.type _vpaes_schedule_192_smear,%function +.align 4 +_vpaes_schedule_192_smear: + movi v1.16b, #0 + dup v0.4s, v7.s[3] + ins v1.s[3], v6.s[2] // vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 + ins v0.s[0], v7.s[2] // vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + eor v6.16b, v6.16b, v1.16b // vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v6.16b, v6.16b, v0.16b // vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a + mov v0.16b, v6.16b // vmovdqa %xmm6, %xmm0 + ins v6.d[0], v1.d[0] // vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + +## +## .aes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm4, %r11. +## +.type _vpaes_schedule_round,%function +.align 4 +_vpaes_schedule_round: + // extract rcon from xmm8 + movi v4.16b, #0 // vpxor %xmm4, %xmm4, %xmm4 + ext v1.16b, v8.16b, v4.16b, #15 // vpalignr $15, %xmm8, %xmm4, %xmm1 + ext v8.16b, v8.16b, v8.16b, #15 // vpalignr $15, %xmm8, %xmm8, %xmm8 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + + // rotate + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + ext v0.16b, v0.16b, v0.16b, #1 // vpalignr $1, %xmm0, %xmm0, %xmm0 + + // fall through... + + // low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + // smear xmm7 + ext v1.16b, v4.16b, v7.16b, #12 // vpslldq $4, %xmm7, %xmm1 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + ext v4.16b, v4.16b, v7.16b, #8 // vpslldq $8, %xmm7, %xmm4 + + // subbytes + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + eor v7.16b, v7.16b, v4.16b // vpxor %xmm4, %xmm7, %xmm7 + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v7.16b, v7.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm7, %xmm7 + tbl v3.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak + eor v3.16b, v3.16b, v1.16b // vpxor %xmm1, %xmm3, %xmm3 # 2 = io + eor v2.16b, v2.16b, v0.16b // vpxor %xmm0, %xmm2, %xmm2 # 3 = jo + tbl v4.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou + tbl v1.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t + eor v1.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output + + // add in smeared stuff + eor v0.16b, v1.16b, v7.16b // vpxor %xmm7, %xmm1, %xmm0 + eor v7.16b, v1.16b, v7.16b // vmovdqa %xmm0, %xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round + +## +## .aes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%r11) +## +## Requires that %xmm9 = 0x0F0F... as in preheat +## Output in %xmm0 +## Clobbers %xmm1, %xmm2 +## +.type _vpaes_schedule_transform,%function +.align 4 +_vpaes_schedule_transform: + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + // vmovdqa (%r11), %xmm2 # lo + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + // vmovdqa 16(%r11), %xmm1 # hi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + +## +## .aes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by "inverse mixcolumns" circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%rdx), and increments or decrements it +## Keeps track of round number mod 4 in %r8 +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## +.type _vpaes_schedule_mangle,%function +.align 4 +_vpaes_schedule_mangle: + mov v4.16b, v0.16b // vmovdqa %xmm0, %xmm4 # save xmm0 for later + // vmovdqa .Lk_mc_forward(%rip),%xmm5 + cbnz w3, .Lschedule_mangle_dec + + // encrypting + eor v4.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm4 + add x2, x2, #16 // add $16, %rdx + tbl v4.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm4 + tbl v1.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm1 + tbl v3.16b, {v1.16b}, v9.16b // vpshufb %xmm5, %xmm1, %xmm3 + eor v4.16b, v4.16b, v1.16b // vpxor %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v3.16b, v3.16b, v4.16b // vpxor %xmm4, %xmm3, %xmm3 + + b .Lschedule_mangle_both +.align 4 +.Lschedule_mangle_dec: + // inverse mix columns + // lea .Lk_dksd(%rip),%r11 + ushr v1.16b, v4.16b, #4 // vpsrlb $4, %xmm4, %xmm1 # 1 = hi + and v4.16b, v4.16b, v17.16b // vpand %xmm9, %xmm4, %xmm4 # 4 = lo + + // vmovdqa 0x00(%r11), %xmm2 + tbl v2.16b, {v24.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + // vmovdqa 0x10(%r11), %xmm3 + tbl v3.16b, {v25.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x20(%r11), %xmm2 + tbl v2.16b, {v26.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x30(%r11), %xmm3 + tbl v3.16b, {v27.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x40(%r11), %xmm2 + tbl v2.16b, {v28.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x50(%r11), %xmm3 + tbl v3.16b, {v29.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + + // vmovdqa 0x60(%r11), %xmm2 + tbl v2.16b, {v30.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + // vmovdqa 0x70(%r11), %xmm4 + tbl v4.16b, {v31.16b}, v1.16b // vpshufb %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + eor v3.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm3 + + sub x2, x2, #16 // add $-16, %rdx + +.Lschedule_mangle_both: + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + add x8, x8, #48 // add $-16, %r8 + and x8, x8, #~(1<<6) // and $0x30, %r8 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + +.globl vpaes_set_encrypt_key +.hidden vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,%function +.align 4 +vpaes_set_encrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + mov w3, #0 // mov $0,%ecx + mov x8, #0x30 // mov $0x30,%r8d + bl _vpaes_schedule_core + eor x0, x0, x0 + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.hidden vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,%function +.align 4 +vpaes_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + lsl w9, w9, #4 // shl $4,%eax + add x2, x2, #16 // lea 16(%rdx,%rax),%rdx + add x2, x2, x9 + + mov w3, #1 // mov $1,%ecx + lsr w8, w1, #1 // shr $1,%r8d + and x8, x8, #32 // and $32,%r8d + eor x8, x8, #32 // xor $32,%r8d # nbits==192?0:32 + bl _vpaes_schedule_core + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key +.globl vpaes_cbc_encrypt +.hidden vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,%function +.align 4 +vpaes_cbc_encrypt: + AARCH64_SIGN_LINK_REGISTER + cbz x2, .Lcbc_abort + cmp w5, #0 // check direction + b.eq vpaes_cbc_decrypt + + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x17, x2 // reassign + mov x2, x3 // reassign + + ld1 {v0.16b}, [x4] // load ivec + bl _vpaes_encrypt_preheat + b .Lcbc_enc_loop + +.align 4 +.Lcbc_enc_loop: + ld1 {v7.16b}, [x0],#16 // load input + eor v7.16b, v7.16b, v0.16b // xor with ivec + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1],#16 // save output + subs x17, x17, #16 + b.hi .Lcbc_enc_loop + + st1 {v0.16b}, [x4] // write ivec + + ldp x29,x30,[sp],#16 +.Lcbc_abort: + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + +.type vpaes_cbc_decrypt,%function +.align 4 +vpaes_cbc_decrypt: + // Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to + // only from vpaes_cbc_encrypt which has already signed the return address. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + mov x17, x2 // reassign + mov x2, x3 // reassign + ld1 {v6.16b}, [x4] // load ivec + bl _vpaes_decrypt_preheat + tst x17, #16 + b.eq .Lcbc_dec_loop2x + + ld1 {v7.16b}, [x0], #16 // load input + bl _vpaes_decrypt_core + eor v0.16b, v0.16b, v6.16b // xor with ivec + orr v6.16b, v7.16b, v7.16b // next ivec value + st1 {v0.16b}, [x1], #16 + subs x17, x17, #16 + b.ls .Lcbc_dec_done + +.align 4 +.Lcbc_dec_loop2x: + ld1 {v14.16b,v15.16b}, [x0], #32 + bl _vpaes_decrypt_2x + eor v0.16b, v0.16b, v6.16b // xor with ivec + eor v1.16b, v1.16b, v14.16b + orr v6.16b, v15.16b, v15.16b + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #32 + b.hi .Lcbc_dec_loop2x + +.Lcbc_dec_done: + st1 {v6.16b}, [x4] + + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt +.globl vpaes_ctr32_encrypt_blocks +.hidden vpaes_ctr32_encrypt_blocks +.type vpaes_ctr32_encrypt_blocks,%function +.align 4 +vpaes_ctr32_encrypt_blocks: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + cbz x2, .Lctr32_done + + // Note, unlike the other functions, x2 here is measured in blocks, + // not bytes. + mov x17, x2 + mov x2, x3 + + // Load the IV and counter portion. + ldr w6, [x4, #12] + ld1 {v7.16b}, [x4] + + bl _vpaes_encrypt_preheat + tst x17, #1 + rev w6, w6 // The counter is big-endian. + b.eq .Lctr32_prep_loop + + // Handle one block so the remaining block count is even for + // _vpaes_encrypt_2x. + ld1 {v6.16b}, [x0], #16 // .Load input ahead of time + bl _vpaes_encrypt_core + eor v0.16b, v0.16b, v6.16b // XOR input and result + st1 {v0.16b}, [x1], #16 + subs x17, x17, #1 + // Update the counter. + add w6, w6, #1 + rev w7, w6 + mov v7.s[3], w7 + b.ls .Lctr32_done + +.Lctr32_prep_loop: + // _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x + // uses v14 and v15. + mov v15.16b, v7.16b + mov v14.16b, v7.16b + add w6, w6, #1 + rev w7, w6 + mov v15.s[3], w7 + +.Lctr32_loop: + ld1 {v6.16b,v7.16b}, [x0], #32 // .Load input ahead of time + bl _vpaes_encrypt_2x + eor v0.16b, v0.16b, v6.16b // XOR input and result + eor v1.16b, v1.16b, v7.16b // XOR input and result (#2) + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #2 + // Update the counter. + add w7, w6, #1 + add w6, w6, #2 + rev w7, w7 + mov v14.s[3], w7 + rev w7, w6 + mov v15.s[3], w7 + b.hi .Lctr32_loop + +.Lctr32_done: + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-win.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-win.S new file mode 100644 index 0000000000..d399d22942 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-armv8-win.S @@ -0,0 +1,1262 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.section .rodata + + +.align 7 // totally strategic alignment +_vpaes_consts: +Lk_mc_forward: // mc_forward +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 +Lk_mc_backward: // mc_backward +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F +Lk_sr: // sr +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +// +// "Hot" constants +// +Lk_inv: // inv, inva +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 +Lk_ipt: // input transform (lo, hi) +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 +Lk_sbo: // sbou, sbot +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA +Lk_sb1: // sb1u, sb1t +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +Lk_sb2: // sb2u, sb2t +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD + +// +// Decryption stuff +// +Lk_dipt: // decryption input transform +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 +Lk_dsbo: // decryption sbox final output +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +Lk_dsb9: // decryption sbox output *9*u, *9*t +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +Lk_dsbd: // decryption sbox output *D*u, *D*t +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +Lk_dsbb: // decryption sbox output *B*u, *B*t +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +Lk_dsbe: // decryption sbox output *E*u, *E*t +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 + +// +// Key schedule constants +// +Lk_dksd: // decryption key schedule: invskew x*D +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +Lk_dksb: // decryption key schedule: invskew x*B +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +Lk_dkse: // decryption key schedule: invskew x*E + 0x63 +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +Lk_dks9: // decryption key schedule: invskew x*9 +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + +Lk_rcon: // rcon +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +Lk_opt: // output transform +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 +Lk_deskew: // deskew tables: inverts the sbox's "skew" +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 2 + +.align 6 + +.text +## +## _aes_preheat +## +## Fills register %r10 -> .aes_consts (so you can -fPIC) +## and %xmm9-%xmm15 as specified below. +## +.def _vpaes_encrypt_preheat + .type 32 +.endef +.align 4 +_vpaes_encrypt_preheat: + adrp x10, Lk_inv + add x10, x10, :lo12:Lk_inv + movi v17.16b, #0x0f + ld1 {v18.2d,v19.2d}, [x10],#32 // Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // Lk_ipt, Lk_sbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10] // Lk_sb1, Lk_sb2 + ret + + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm9-%xmm15 as in _vpaes_preheat +## (%rdx) = scheduled keys +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax +## Preserves %xmm6 - %xmm8 so you get some local vectors +## +## +.def _vpaes_encrypt_core + .type 32 +.endef +.align 4 +_vpaes_encrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, Lk_mc_forward+16 + add x11, x11, :lo12:Lk_mc_forward+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b Lenc_entry + +.align 4 +Lenc_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + sub w8, w8, #1 // nr-- + +Lenc_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v5.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, Lenc_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + ret + + +.globl vpaes_encrypt + +.def vpaes_encrypt + .type 32 +.endef +.align 4 +vpaes_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_encrypt_preheat + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.def _vpaes_encrypt_2x + .type 32 +.endef +.align 4 +_vpaes_encrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, Lk_mc_forward+16 + add x11, x11, :lo12:Lk_mc_forward+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + tbl v9.16b, {v20.16b}, v9.16b + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + tbl v10.16b, {v21.16b}, v8.16b + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v8.16b, v9.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b Lenc_2x_entry + +.align 4 +Lenc_2x_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + tbl v12.16b, {v25.16b}, v10.16b + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + tbl v8.16b, {v24.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + tbl v13.16b, {v27.16b}, v10.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + tbl v10.16b, {v26.16b}, v11.16b + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + tbl v11.16b, {v8.16b}, v1.16b + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + eor v10.16b, v10.16b, v13.16b + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + tbl v8.16b, {v8.16b}, v4.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + eor v11.16b, v11.16b, v10.16b + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + tbl v12.16b, {v11.16b},v1.16b + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + eor v8.16b, v8.16b, v11.16b + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + eor v8.16b, v8.16b, v12.16b + sub w8, w8, #1 // nr-- + +Lenc_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v5.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + tbl v13.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v13.16b + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v13.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, Lenc_2x_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + tbl v8.16b, {v23.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v0.16b, {v0.16b},v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v1.16b + ret + + +.def _vpaes_decrypt_preheat + .type 32 +.endef +.align 4 +_vpaes_decrypt_preheat: + adrp x10, Lk_inv + add x10, x10, :lo12:Lk_inv + movi v17.16b, #0x0f + adrp x11, Lk_dipt + add x11, x11, :lo12:Lk_dipt + ld1 {v18.2d,v19.2d}, [x10],#32 // Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // Lk_dipt, Lk_dsbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // Lk_dsb9, Lk_dsbd + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x11] // Lk_dsbb, Lk_dsbe + ret + + +## +## Decryption core +## +## Same API as encryption core. +## +.def _vpaes_decrypt_core + .type 32 +.endef +.align 4 +_vpaes_decrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, Lk_sr + add x10, x10, :lo12:Lk_sr + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, Lk_mc_forward+48 + add x10, x10, :lo12:Lk_mc_forward+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + ld1 {v5.2d}, [x10] // vmovdqa Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b Ldec_entry + +.align 4 +Ldec_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + sub w8, w8, #1 // sub $1,%rax # nr-- + +Ldec_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, Ldec_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + ret + + +.globl vpaes_decrypt + +.def vpaes_decrypt + .type 32 +.endef +.align 4 +vpaes_decrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_decrypt_preheat + bl _vpaes_decrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +// v14-v15 input, v0-v1 output +.def _vpaes_decrypt_2x + .type 32 +.endef +.align 4 +_vpaes_decrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, Lk_sr + add x10, x10, :lo12:Lk_sr + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, Lk_mc_forward+48 + add x10, x10, :lo12:Lk_mc_forward+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v2.16b, {v20.16b},v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + tbl v10.16b, {v20.16b},v9.16b + ld1 {v5.2d}, [x10] // vmovdqa Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b},v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + tbl v8.16b, {v21.16b},v8.16b + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v10.16b, v10.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b Ldec_2x_entry + +.align 4 +Ldec_2x_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v12.16b, {v24.16b}, v10.16b + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + tbl v9.16b, {v25.16b}, v11.16b + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + eor v8.16b, v12.16b, v16.16b + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v12.16b, {v26.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + tbl v9.16b, {v27.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v12.16b, {v28.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + tbl v9.16b, {v29.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v12.16b, {v30.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + tbl v9.16b, {v31.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + sub w8, w8, #1 // sub $1,%rax # nr-- + +Ldec_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v2.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + tbl v10.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v10.16b + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v10.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, Ldec_2x_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + tbl v9.16b, {v23.16b}, v11.16b + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + eor v8.16b, v9.16b, v12.16b + tbl v0.16b, {v0.16b},v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v2.16b + ret + +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## +.def _vpaes_key_preheat + .type 32 +.endef +.align 4 +_vpaes_key_preheat: + adrp x10, Lk_inv + add x10, x10, :lo12:Lk_inv + movi v16.16b, #0x5b // Lk_s63 + adrp x11, Lk_sb1 + add x11, x11, :lo12:Lk_sb1 + movi v17.16b, #0x0f // Lk_s0F + ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // Lk_inv, Lk_ipt + adrp x10, Lk_dksd + add x10, x10, :lo12:Lk_dksd + ld1 {v22.2d,v23.2d}, [x11] // Lk_sb1 + adrp x11, Lk_mc_forward + add x11, x11, :lo12:Lk_mc_forward + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // Lk_dksd, Lk_dksb + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // Lk_dkse, Lk_dks9 + ld1 {v8.2d}, [x10] // Lk_rcon + ld1 {v9.2d}, [x11] // Lk_mc_forward[0] + ret + + +.def _vpaes_schedule_core + .type 32 +.endef +.align 4 +_vpaes_schedule_core: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp,#-16]! + add x29,sp,#0 + + bl _vpaes_key_preheat // load the tables + + ld1 {v0.16b}, [x0],#16 // vmovdqu (%rdi), %xmm0 # load key (unaligned) + + // input transform + mov v3.16b, v0.16b // vmovdqa %xmm0, %xmm3 + bl _vpaes_schedule_transform + mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 + + adrp x10, Lk_sr // lea Lk_sr(%rip),%r10 + add x10, x10, :lo12:Lk_sr + + add x8, x8, x10 + cbnz w3, Lschedule_am_decrypting + + // encrypting, output zeroth round key after transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) + b Lschedule_go + +Lschedule_am_decrypting: + // decrypting, output zeroth round key after shiftrows + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + eor x8, x8, #0x30 // xor $0x30, %r8 + +Lschedule_go: + cmp w1, #192 // cmp $192, %esi + b.hi Lschedule_256 + b.eq Lschedule_192 + // 128: fall though + +## +## .schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +Lschedule_128: + mov x0, #10 // mov $10, %esi + +Loop_schedule_128: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + cbz x0, Lschedule_mangle_last + bl _vpaes_schedule_mangle // write output + b Loop_schedule_128 + +## +## .aes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +.align 4 +Lschedule_192: + sub x0, x0, #8 + ld1 {v0.16b}, [x0] // vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + bl _vpaes_schedule_transform // input transform + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save short part + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 # clear 4 + ins v6.d[0], v4.d[0] // vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros + mov x0, #4 // mov $4, %esi + +Loop_schedule_192: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + ext v0.16b, v6.16b, v0.16b, #8 // vpalignr $8,%xmm6,%xmm0,%xmm0 + bl _vpaes_schedule_mangle // save key n + bl _vpaes_schedule_192_smear + bl _vpaes_schedule_mangle // save key n+1 + bl _vpaes_schedule_round + cbz x0, Lschedule_mangle_last + bl _vpaes_schedule_mangle // save key n+2 + bl _vpaes_schedule_192_smear + b Loop_schedule_192 + +## +## .aes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional "low side" in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +.align 4 +Lschedule_256: + ld1 {v0.16b}, [x0] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + bl _vpaes_schedule_transform // input transform + mov x0, #7 // mov $7, %esi + +Loop_schedule_256: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_mangle // output low result + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + // high round + bl _vpaes_schedule_round + cbz x0, Lschedule_mangle_last + bl _vpaes_schedule_mangle + + // low round. swap xmm7 and xmm6 + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + movi v4.16b, #0 + mov v5.16b, v7.16b // vmovdqa %xmm7, %xmm5 + mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 + bl _vpaes_schedule_low_round + mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 + + b Loop_schedule_256 + +## +## .aes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +.align 4 +Lschedule_mangle_last: + // schedule last round key from xmm0 + adrp x11, Lk_deskew // lea Lk_deskew(%rip),%r11 # prepare to deskew + add x11, x11, :lo12:Lk_deskew + + cbnz w3, Lschedule_mangle_last_dec + + // encrypting + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 + adrp x11, Lk_opt // lea Lk_opt(%rip), %r11 # prepare to output transform + add x11, x11, :lo12:Lk_opt + add x2, x2, #32 // add $32, %rdx + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute + +Lschedule_mangle_last_dec: + ld1 {v20.2d,v21.2d}, [x11] // reload constants + sub x2, x2, #16 // add $-16, %rdx + eor v0.16b, v0.16b, v16.16b // vpxor Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform // output transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) # save last key + + // cleanup + eor v0.16b, v0.16b, v0.16b // vpxor %xmm0, %xmm0, %xmm0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v2.16b, v2.16b, v2.16b // vpxor %xmm2, %xmm2, %xmm2 + eor v3.16b, v3.16b, v3.16b // vpxor %xmm3, %xmm3, %xmm3 + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 + eor v5.16b, v5.16b, v5.16b // vpxor %xmm5, %xmm5, %xmm5 + eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 + eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 + ldp x29, x30, [sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +## +## .aes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## +.def _vpaes_schedule_192_smear + .type 32 +.endef +.align 4 +_vpaes_schedule_192_smear: + movi v1.16b, #0 + dup v0.4s, v7.s[3] + ins v1.s[3], v6.s[2] // vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 + ins v0.s[0], v7.s[2] // vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + eor v6.16b, v6.16b, v1.16b // vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v6.16b, v6.16b, v0.16b // vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a + mov v0.16b, v6.16b // vmovdqa %xmm6, %xmm0 + ins v6.d[0], v1.d[0] // vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros + ret + + +## +## .aes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm4, %r11. +## +.def _vpaes_schedule_round + .type 32 +.endef +.align 4 +_vpaes_schedule_round: + // extract rcon from xmm8 + movi v4.16b, #0 // vpxor %xmm4, %xmm4, %xmm4 + ext v1.16b, v8.16b, v4.16b, #15 // vpalignr $15, %xmm8, %xmm4, %xmm1 + ext v8.16b, v8.16b, v8.16b, #15 // vpalignr $15, %xmm8, %xmm8, %xmm8 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + + // rotate + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + ext v0.16b, v0.16b, v0.16b, #1 // vpalignr $1, %xmm0, %xmm0, %xmm0 + + // fall through... + + // low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + // smear xmm7 + ext v1.16b, v4.16b, v7.16b, #12 // vpslldq $4, %xmm7, %xmm1 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + ext v4.16b, v4.16b, v7.16b, #8 // vpslldq $8, %xmm7, %xmm4 + + // subbytes + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + eor v7.16b, v7.16b, v4.16b // vpxor %xmm4, %xmm7, %xmm7 + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v7.16b, v7.16b, v16.16b // vpxor Lk_s63(%rip), %xmm7, %xmm7 + tbl v3.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak + eor v3.16b, v3.16b, v1.16b // vpxor %xmm1, %xmm3, %xmm3 # 2 = io + eor v2.16b, v2.16b, v0.16b // vpxor %xmm0, %xmm2, %xmm2 # 3 = jo + tbl v4.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou + tbl v1.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t + eor v1.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output + + // add in smeared stuff + eor v0.16b, v1.16b, v7.16b // vpxor %xmm7, %xmm1, %xmm0 + eor v7.16b, v1.16b, v7.16b // vmovdqa %xmm0, %xmm7 + ret + + +## +## .aes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%r11) +## +## Requires that %xmm9 = 0x0F0F... as in preheat +## Output in %xmm0 +## Clobbers %xmm1, %xmm2 +## +.def _vpaes_schedule_transform + .type 32 +.endef +.align 4 +_vpaes_schedule_transform: + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + // vmovdqa (%r11), %xmm2 # lo + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + // vmovdqa 16(%r11), %xmm1 # hi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + ret + + +## +## .aes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by "inverse mixcolumns" circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%rdx), and increments or decrements it +## Keeps track of round number mod 4 in %r8 +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## +.def _vpaes_schedule_mangle + .type 32 +.endef +.align 4 +_vpaes_schedule_mangle: + mov v4.16b, v0.16b // vmovdqa %xmm0, %xmm4 # save xmm0 for later + // vmovdqa .Lk_mc_forward(%rip),%xmm5 + cbnz w3, Lschedule_mangle_dec + + // encrypting + eor v4.16b, v0.16b, v16.16b // vpxor Lk_s63(%rip), %xmm0, %xmm4 + add x2, x2, #16 // add $16, %rdx + tbl v4.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm4 + tbl v1.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm1 + tbl v3.16b, {v1.16b}, v9.16b // vpshufb %xmm5, %xmm1, %xmm3 + eor v4.16b, v4.16b, v1.16b // vpxor %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v3.16b, v3.16b, v4.16b // vpxor %xmm4, %xmm3, %xmm3 + + b Lschedule_mangle_both +.align 4 +Lschedule_mangle_dec: + // inverse mix columns + // lea .Lk_dksd(%rip),%r11 + ushr v1.16b, v4.16b, #4 // vpsrlb $4, %xmm4, %xmm1 # 1 = hi + and v4.16b, v4.16b, v17.16b // vpand %xmm9, %xmm4, %xmm4 # 4 = lo + + // vmovdqa 0x00(%r11), %xmm2 + tbl v2.16b, {v24.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + // vmovdqa 0x10(%r11), %xmm3 + tbl v3.16b, {v25.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x20(%r11), %xmm2 + tbl v2.16b, {v26.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x30(%r11), %xmm3 + tbl v3.16b, {v27.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x40(%r11), %xmm2 + tbl v2.16b, {v28.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x50(%r11), %xmm3 + tbl v3.16b, {v29.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + + // vmovdqa 0x60(%r11), %xmm2 + tbl v2.16b, {v30.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + // vmovdqa 0x70(%r11), %xmm4 + tbl v4.16b, {v31.16b}, v1.16b // vpshufb %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + eor v3.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm3 + + sub x2, x2, #16 // add $-16, %rdx + +Lschedule_mangle_both: + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + add x8, x8, #48 // add $-16, %r8 + and x8, x8, #~(1<<6) // and $0x30, %r8 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + ret + + +.globl vpaes_set_encrypt_key + +.def vpaes_set_encrypt_key + .type 32 +.endef +.align 4 +vpaes_set_encrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + mov w3, #0 // mov $0,%ecx + mov x8, #0x30 // mov $0x30,%r8d + bl _vpaes_schedule_core + eor x0, x0, x0 + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.globl vpaes_set_decrypt_key + +.def vpaes_set_decrypt_key + .type 32 +.endef +.align 4 +vpaes_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + lsl w9, w9, #4 // shl $4,%eax + add x2, x2, #16 // lea 16(%rdx,%rax),%rdx + add x2, x2, x9 + + mov w3, #1 // mov $1,%ecx + lsr w8, w1, #1 // shr $1,%r8d + and x8, x8, #32 // and $32,%r8d + eor x8, x8, #32 // xor $32,%r8d # nbits==192?0:32 + bl _vpaes_schedule_core + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl vpaes_cbc_encrypt + +.def vpaes_cbc_encrypt + .type 32 +.endef +.align 4 +vpaes_cbc_encrypt: + AARCH64_SIGN_LINK_REGISTER + cbz x2, Lcbc_abort + cmp w5, #0 // check direction + b.eq vpaes_cbc_decrypt + + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x17, x2 // reassign + mov x2, x3 // reassign + + ld1 {v0.16b}, [x4] // load ivec + bl _vpaes_encrypt_preheat + b Lcbc_enc_loop + +.align 4 +Lcbc_enc_loop: + ld1 {v7.16b}, [x0],#16 // load input + eor v7.16b, v7.16b, v0.16b // xor with ivec + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1],#16 // save output + subs x17, x17, #16 + b.hi Lcbc_enc_loop + + st1 {v0.16b}, [x4] // write ivec + + ldp x29,x30,[sp],#16 +Lcbc_abort: + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.def vpaes_cbc_decrypt + .type 32 +.endef +.align 4 +vpaes_cbc_decrypt: + // Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to + // only from vpaes_cbc_encrypt which has already signed the return address. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + mov x17, x2 // reassign + mov x2, x3 // reassign + ld1 {v6.16b}, [x4] // load ivec + bl _vpaes_decrypt_preheat + tst x17, #16 + b.eq Lcbc_dec_loop2x + + ld1 {v7.16b}, [x0], #16 // load input + bl _vpaes_decrypt_core + eor v0.16b, v0.16b, v6.16b // xor with ivec + orr v6.16b, v7.16b, v7.16b // next ivec value + st1 {v0.16b}, [x1], #16 + subs x17, x17, #16 + b.ls Lcbc_dec_done + +.align 4 +Lcbc_dec_loop2x: + ld1 {v14.16b,v15.16b}, [x0], #32 + bl _vpaes_decrypt_2x + eor v0.16b, v0.16b, v6.16b // xor with ivec + eor v1.16b, v1.16b, v14.16b + orr v6.16b, v15.16b, v15.16b + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #32 + b.hi Lcbc_dec_loop2x + +Lcbc_dec_done: + st1 {v6.16b}, [x4] + + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.globl vpaes_ctr32_encrypt_blocks + +.def vpaes_ctr32_encrypt_blocks + .type 32 +.endef +.align 4 +vpaes_ctr32_encrypt_blocks: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + cbz x2, Lctr32_done + + // Note, unlike the other functions, x2 here is measured in blocks, + // not bytes. + mov x17, x2 + mov x2, x3 + + // Load the IV and counter portion. + ldr w6, [x4, #12] + ld1 {v7.16b}, [x4] + + bl _vpaes_encrypt_preheat + tst x17, #1 + rev w6, w6 // The counter is big-endian. + b.eq Lctr32_prep_loop + + // Handle one block so the remaining block count is even for + // _vpaes_encrypt_2x. + ld1 {v6.16b}, [x0], #16 // Load input ahead of time + bl _vpaes_encrypt_core + eor v0.16b, v0.16b, v6.16b // XOR input and result + st1 {v0.16b}, [x1], #16 + subs x17, x17, #1 + // Update the counter. + add w6, w6, #1 + rev w7, w6 + mov v7.s[3], w7 + b.ls Lctr32_done + +Lctr32_prep_loop: + // _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x + // uses v14 and v15. + mov v15.16b, v7.16b + mov v14.16b, v7.16b + add w6, w6, #1 + rev w7, w6 + mov v15.s[3], w7 + +Lctr32_loop: + ld1 {v6.16b,v7.16b}, [x0], #32 // Load input ahead of time + bl _vpaes_encrypt_2x + eor v0.16b, v0.16b, v6.16b // XOR input and result + eor v1.16b, v1.16b, v7.16b // XOR input and result (#2) + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #2 + // Update the counter. + add w7, w6, #1 + add w6, w6, #2 + rev w7, w7 + mov v14.s[3], w7 + rev w7, w6 + mov v15.s[3], w7 + b.hi Lctr32_loop + +Lctr32_done: + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-apple.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-apple.S new file mode 100644 index 0000000000..4d2c4856d1 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-apple.S @@ -0,0 +1,680 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +#ifdef BORINGSSL_DISPATCH_TEST +#endif +.align 6,0x90 +L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 6,0x90 +.private_extern __vpaes_preheat +.align 4 +__vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.private_extern __vpaes_encrypt_core +.align 4 +__vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 + pxor %xmm5,%xmm2 + psrld $4,%xmm1 + addl $16,%edx +.byte 102,15,56,0,193 + leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 + jmp L000enc_entry +.align 4,0x90 +L001enc_loop: + movdqa 32(%ebp),%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa 64(%ebp),%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 + movdqa 80(%ebp),%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + subl $1,%eax + pxor %xmm3,%xmm0 +L000enc_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm5 + pxor %xmm1,%xmm3 + jnz L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.private_extern __vpaes_decrypt_core +.align 4 +__vpaes_decrypt_core: + leal 608(%ebp),%ebx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp L002dec_entry +.align 4,0x90 +L003dec_loop: + movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax +L002dec_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + psrld $4,%xmm1 +.byte 102,15,56,0,208 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 + jnz L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.private_extern __vpaes_schedule_core +.align 4 +__vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call __vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp L005schedule_go +L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +L005schedule_go: + cmpl $192,%eax + ja L006schedule_256 + je L007schedule_192 +L008schedule_128: + movl $10,%eax +L009loop_schedule_128: + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + jmp L009loop_schedule_128 +.align 4,0x90 +L007schedule_192: + movdqu 8(%esi),%xmm0 + call __vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +L011loop_schedule_192: + call __vpaes_schedule_round +.byte 102,15,58,15,198,8 + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + call __vpaes_schedule_mangle + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + jmp L011loop_schedule_192 +.align 4,0x90 +L006schedule_256: + movdqu 16(%esi),%xmm0 + call __vpaes_schedule_transform + movl $7,%eax +L012loop_schedule_256: + call __vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp L012loop_schedule_256 +.align 4,0x90 +L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call __vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.private_extern __vpaes_schedule_192_smear +.align 4 +__vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.private_extern __vpaes_schedule_round +.align 4 +__vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.private_extern __vpaes_schedule_transform +.align 4 +__vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.private_extern __vpaes_schedule_mangle +.align 4 +__vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp L015schedule_mangle_both +.align 4,0x90 +L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.globl _vpaes_set_encrypt_key +.private_extern _vpaes_set_encrypt_key +.align 4 +_vpaes_set_encrypt_key: +L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call L016pic +L016pic: + popl %ebx + leal _BORINGSSL_function_hit+5-L016pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal L_vpaes_consts+0x30-L017pic_point,%ebp + call __vpaes_schedule_core +L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_set_decrypt_key +.private_extern _vpaes_set_decrypt_key +.align 4 +_vpaes_set_decrypt_key: +L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal L_vpaes_consts+0x30-L018pic_point,%ebp + call __vpaes_schedule_core +L018pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_encrypt +.private_extern _vpaes_encrypt +.align 4 +_vpaes_encrypt: +L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call L019pic +L019pic: + popl %ebx + leal _BORINGSSL_function_hit+4-L019pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + leal L_vpaes_consts+0x30-L020pic_point,%ebp + call __vpaes_preheat +L020pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call __vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_decrypt +.private_extern _vpaes_decrypt +.align 4 +_vpaes_decrypt: +L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal L_vpaes_consts+0x30-L021pic_point,%ebp + call __vpaes_preheat +L021pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call __vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_cbc_encrypt +.private_extern _vpaes_cbc_encrypt +.align 4 +_vpaes_cbc_encrypt: +L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc L022cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal L_vpaes_consts+0x30-L023pic_point,%ebp + call __vpaes_preheat +L023pic_point: + cmpl $0,%ecx + je L024cbc_dec_loop + jmp L025cbc_enc_loop +.align 4,0x90 +L025cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call __vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc L025cbc_enc_loop + jmp L026cbc_done +.align 4,0x90 +L024cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call __vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc L024cbc_dec_loop +L026cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +L022cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-linux.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-linux.S new file mode 100644 index 0000000000..02786a7615 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-linux.S @@ -0,0 +1,706 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +#ifdef BORINGSSL_DISPATCH_TEST +#endif +.align 64 +.L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 64 +.hidden _vpaes_preheat +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.size _vpaes_preheat,.-_vpaes_preheat +.hidden _vpaes_encrypt_core +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 + pxor %xmm5,%xmm2 + psrld $4,%xmm1 + addl $16,%edx +.byte 102,15,56,0,193 + leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 + jmp .L000enc_entry +.align 16 +.L001enc_loop: + movdqa 32(%ebp),%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa 64(%ebp),%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 + movdqa 80(%ebp),%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + subl $1,%eax + pxor %xmm3,%xmm0 +.L000enc_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm5 + pxor %xmm1,%xmm3 + jnz .L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core +.hidden _vpaes_decrypt_core +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + leal 608(%ebp),%ebx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp .L002dec_entry +.align 16 +.L003dec_loop: + movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax +.L002dec_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + psrld $4,%xmm1 +.byte 102,15,56,0,208 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 + jnz .L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core +.hidden _vpaes_schedule_core +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz .L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp .L005schedule_go +.L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +.L005schedule_go: + cmpl $192,%eax + ja .L006schedule_256 + je .L007schedule_192 +.L008schedule_128: + movl $10,%eax +.L009loop_schedule_128: + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + jmp .L009loop_schedule_128 +.align 16 +.L007schedule_192: + movdqu 8(%esi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +.L011loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .L011loop_schedule_192 +.align 16 +.L006schedule_256: + movdqu 16(%esi),%xmm0 + call _vpaes_schedule_transform + movl $7,%eax +.L012loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call .L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp .L012loop_schedule_256 +.align 16 +.L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz .L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +.L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core +.hidden _vpaes_schedule_192_smear +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear +.hidden _vpaes_schedule_round +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +.L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round +.hidden _vpaes_schedule_transform +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform +.hidden _vpaes_schedule_mangle +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz .L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp .L015schedule_mangle_both +.align 16 +.L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +.L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle +.globl vpaes_set_encrypt_key +.hidden vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call .L016pic +.L016pic: + popl %ebx + leal BORINGSSL_function_hit+5-.L016pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal .L_vpaes_consts+0x30-.L017pic_point,%ebp + call _vpaes_schedule_core +.L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin +.globl vpaes_set_decrypt_key +.hidden vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal .L_vpaes_consts+0x30-.L018pic_point,%ebp + call _vpaes_schedule_core +.L018pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin +.globl vpaes_encrypt +.hidden vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +#ifdef BORINGSSL_DISPATCH_TEST + pushl %ebx + pushl %edx + call .L019pic +.L019pic: + popl %ebx + leal BORINGSSL_function_hit+4-.L019pic(%ebx),%ebx + movl $1,%edx + movb %dl,(%ebx) + popl %edx + popl %ebx +#endif + leal .L_vpaes_consts+0x30-.L020pic_point,%ebp + call _vpaes_preheat +.L020pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_encrypt,.-.L_vpaes_encrypt_begin +.globl vpaes_decrypt +.hidden vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + call _vpaes_preheat +.L021pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_decrypt,.-.L_vpaes_decrypt_begin +.globl vpaes_cbc_encrypt +.hidden vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc .L022cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal .L_vpaes_consts+0x30-.L023pic_point,%ebp + call _vpaes_preheat +.L023pic_point: + cmpl $0,%ecx + je .L024cbc_dec_loop + jmp .L025cbc_enc_loop +.align 16 +.L025cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call _vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L025cbc_enc_loop + jmp .L026cbc_done +.align 16 +.L024cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call _vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L024cbc_dec_loop +.L026cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +.L022cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-win.asm b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-win.asm new file mode 100644 index 0000000000..661496e363 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86-win.asm @@ -0,0 +1,679 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +%ifdef BORINGSSL_DISPATCH_TEST +extern _BORINGSSL_function_hit +%endif +align 64 +L$_vpaes_consts: +dd 218628480,235210255,168496130,67568393 +dd 252381056,17041926,33884169,51187212 +dd 252645135,252645135,252645135,252645135 +dd 1512730624,3266504856,1377990664,3401244816 +dd 830229760,1275146365,2969422977,3447763452 +dd 3411033600,2979783055,338359620,2782886510 +dd 4209124096,907596821,221174255,1006095553 +dd 191964160,3799684038,3164090317,1589111125 +dd 182528256,1777043520,2877432650,3265356744 +dd 1874708224,3503451415,3305285752,363511674 +dd 1606117888,3487855781,1093350906,2384367825 +dd 197121,67569157,134941193,202313229 +dd 67569157,134941193,202313229,197121 +dd 134941193,202313229,197121,67569157 +dd 202313229,197121,67569157,134941193 +dd 33619971,100992007,168364043,235736079 +dd 235736079,33619971,100992007,168364043 +dd 168364043,235736079,33619971,100992007 +dd 100992007,168364043,235736079,33619971 +dd 50462976,117835012,185207048,252579084 +dd 252314880,51251460,117574920,184942860 +dd 184682752,252054788,50987272,118359308 +dd 118099200,185467140,251790600,50727180 +dd 2946363062,528716217,1300004225,1881839624 +dd 1532713819,1532713819,1532713819,1532713819 +dd 3602276352,4288629033,3737020424,4153884961 +dd 1354558464,32357713,2958822624,3775749553 +dd 1201988352,132424512,1572796698,503232858 +dd 2213177600,1597421020,4103937655,675398315 +dd 2749646592,4273543773,1511898873,121693092 +dd 3040248576,1103263732,2871565598,1608280554 +dd 2236667136,2588920351,482954393,64377734 +dd 3069987328,291237287,2117370568,3650299247 +dd 533321216,3573750986,2572112006,1401264716 +dd 1339849704,2721158661,548607111,3445553514 +dd 2128193280,3054596040,2183486460,1257083700 +dd 655635200,1165381986,3923443150,2344132524 +dd 190078720,256924420,290342170,357187870 +dd 1610966272,2263057382,4103205268,309794674 +dd 2592527872,2233205587,1335446729,3402964816 +dd 3973531904,3225098121,3002836325,1918774430 +dd 3870401024,2102906079,2284471353,4117666579 +dd 617007872,1021508343,366931923,691083277 +dd 2528395776,3491914898,2968704004,1613121270 +dd 3445188352,3247741094,844474987,4093578302 +dd 651481088,1190302358,1689581232,574775300 +dd 4289380608,206939853,2555985458,2489840491 +dd 2130264064,327674451,3566485037,3349835193 +dd 2470714624,316102159,3636825756,3393945945 +db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +db 118,101,114,115,105,116,121,41,0 +align 64 +align 16 +__vpaes_preheat: + add ebp,DWORD [esp] + movdqa xmm7,[ebp-48] + movdqa xmm6,[ebp-16] + ret +align 16 +__vpaes_encrypt_core: + mov ecx,16 + mov eax,DWORD [240+edx] + movdqa xmm1,xmm6 + movdqa xmm2,[ebp] + pandn xmm1,xmm0 + pand xmm0,xmm6 + movdqu xmm5,[edx] +db 102,15,56,0,208 + movdqa xmm0,[16+ebp] + pxor xmm2,xmm5 + psrld xmm1,4 + add edx,16 +db 102,15,56,0,193 + lea ebx,[192+ebp] + pxor xmm0,xmm2 + jmp NEAR L$000enc_entry +align 16 +L$001enc_loop: + movdqa xmm4,[32+ebp] + movdqa xmm0,[48+ebp] +db 102,15,56,0,226 +db 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,[64+ebp] + pxor xmm0,xmm4 + movdqa xmm1,[ecx*1+ebx-64] +db 102,15,56,0,234 + movdqa xmm2,[80+ebp] + movdqa xmm4,[ecx*1+ebx] +db 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +db 102,15,56,0,193 + add edx,16 + pxor xmm0,xmm2 +db 102,15,56,0,220 + add ecx,16 + pxor xmm3,xmm0 +db 102,15,56,0,193 + and ecx,48 + sub eax,1 + pxor xmm0,xmm3 +L$000enc_entry: + movdqa xmm1,xmm6 + movdqa xmm5,[ebp-32] + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm6 +db 102,15,56,0,232 + movdqa xmm3,xmm7 + pxor xmm0,xmm1 +db 102,15,56,0,217 + movdqa xmm4,xmm7 + pxor xmm3,xmm5 +db 102,15,56,0,224 + movdqa xmm2,xmm7 + pxor xmm4,xmm5 +db 102,15,56,0,211 + movdqa xmm3,xmm7 + pxor xmm2,xmm0 +db 102,15,56,0,220 + movdqu xmm5,[edx] + pxor xmm3,xmm1 + jnz NEAR L$001enc_loop + movdqa xmm4,[96+ebp] + movdqa xmm0,[112+ebp] +db 102,15,56,0,226 + pxor xmm4,xmm5 +db 102,15,56,0,195 + movdqa xmm1,[64+ecx*1+ebx] + pxor xmm0,xmm4 +db 102,15,56,0,193 + ret +align 16 +__vpaes_decrypt_core: + lea ebx,[608+ebp] + mov eax,DWORD [240+edx] + movdqa xmm1,xmm6 + movdqa xmm2,[ebx-64] + pandn xmm1,xmm0 + mov ecx,eax + psrld xmm1,4 + movdqu xmm5,[edx] + shl ecx,4 + pand xmm0,xmm6 +db 102,15,56,0,208 + movdqa xmm0,[ebx-48] + xor ecx,48 +db 102,15,56,0,193 + and ecx,48 + pxor xmm2,xmm5 + movdqa xmm5,[176+ebp] + pxor xmm0,xmm2 + add edx,16 + lea ecx,[ecx*1+ebx-352] + jmp NEAR L$002dec_entry +align 16 +L$003dec_loop: + movdqa xmm4,[ebx-32] + movdqa xmm1,[ebx-16] +db 102,15,56,0,226 +db 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,[ebx] + pxor xmm0,xmm1 + movdqa xmm1,[16+ebx] +db 102,15,56,0,226 +db 102,15,56,0,197 +db 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,[32+ebx] + pxor xmm0,xmm1 + movdqa xmm1,[48+ebx] +db 102,15,56,0,226 +db 102,15,56,0,197 +db 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,[64+ebx] + pxor xmm0,xmm1 + movdqa xmm1,[80+ebx] +db 102,15,56,0,226 +db 102,15,56,0,197 +db 102,15,56,0,203 + pxor xmm0,xmm4 + add edx,16 +db 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub eax,1 +L$002dec_entry: + movdqa xmm1,xmm6 + movdqa xmm2,[ebp-32] + pandn xmm1,xmm0 + pand xmm0,xmm6 + psrld xmm1,4 +db 102,15,56,0,208 + movdqa xmm3,xmm7 + pxor xmm0,xmm1 +db 102,15,56,0,217 + movdqa xmm4,xmm7 + pxor xmm3,xmm2 +db 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm7 +db 102,15,56,0,211 + movdqa xmm3,xmm7 + pxor xmm2,xmm0 +db 102,15,56,0,220 + movdqu xmm0,[edx] + pxor xmm3,xmm1 + jnz NEAR L$003dec_loop + movdqa xmm4,[96+ebx] +db 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,[112+ebx] + movdqa xmm2,[ecx] +db 102,15,56,0,195 + pxor xmm0,xmm4 +db 102,15,56,0,194 + ret +align 16 +__vpaes_schedule_core: + add ebp,DWORD [esp] + movdqu xmm0,[esi] + movdqa xmm2,[320+ebp] + movdqa xmm3,xmm0 + lea ebx,[ebp] + movdqa [4+esp],xmm2 + call __vpaes_schedule_transform + movdqa xmm7,xmm0 + test edi,edi + jnz NEAR L$004schedule_am_decrypting + movdqu [edx],xmm0 + jmp NEAR L$005schedule_go +L$004schedule_am_decrypting: + movdqa xmm1,[256+ecx*1+ebp] +db 102,15,56,0,217 + movdqu [edx],xmm3 + xor ecx,48 +L$005schedule_go: + cmp eax,192 + ja NEAR L$006schedule_256 + je NEAR L$007schedule_192 +L$008schedule_128: + mov eax,10 +L$009loop_schedule_128: + call __vpaes_schedule_round + dec eax + jz NEAR L$010schedule_mangle_last + call __vpaes_schedule_mangle + jmp NEAR L$009loop_schedule_128 +align 16 +L$007schedule_192: + movdqu xmm0,[8+esi] + call __vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov eax,4 +L$011loop_schedule_192: + call __vpaes_schedule_round +db 102,15,58,15,198,8 + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + call __vpaes_schedule_mangle + call __vpaes_schedule_round + dec eax + jz NEAR L$010schedule_mangle_last + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + jmp NEAR L$011loop_schedule_192 +align 16 +L$006schedule_256: + movdqu xmm0,[16+esi] + call __vpaes_schedule_transform + mov eax,7 +L$012loop_schedule_256: + call __vpaes_schedule_mangle + movdqa xmm6,xmm0 + call __vpaes_schedule_round + dec eax + jz NEAR L$010schedule_mangle_last + call __vpaes_schedule_mangle + pshufd xmm0,xmm0,255 + movdqa [20+esp],xmm7 + movdqa xmm7,xmm6 + call L$_vpaes_schedule_low_round + movdqa xmm7,[20+esp] + jmp NEAR L$012loop_schedule_256 +align 16 +L$010schedule_mangle_last: + lea ebx,[384+ebp] + test edi,edi + jnz NEAR L$013schedule_mangle_last_dec + movdqa xmm1,[256+ecx*1+ebp] +db 102,15,56,0,193 + lea ebx,[352+ebp] + add edx,32 +L$013schedule_mangle_last_dec: + add edx,-16 + pxor xmm0,[336+ebp] + call __vpaes_schedule_transform + movdqu [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + ret +align 16 +__vpaes_schedule_192_smear: + pshufd xmm1,xmm6,128 + pshufd xmm0,xmm7,254 + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + ret +align 16 +__vpaes_schedule_round: + movdqa xmm2,[8+esp] + pxor xmm1,xmm1 +db 102,15,58,15,202,15 +db 102,15,58,15,210,15 + pxor xmm7,xmm1 + pshufd xmm0,xmm0,255 +db 102,15,58,15,192,1 + movdqa [8+esp],xmm2 +L$_vpaes_schedule_low_round: + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,[336+ebp] + movdqa xmm4,[ebp-16] + movdqa xmm5,[ebp-48] + movdqa xmm1,xmm4 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm4 + movdqa xmm2,[ebp-32] +db 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm5 +db 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm5 +db 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm5 +db 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm5 +db 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,[32+ebp] +db 102,15,56,0,226 + movdqa xmm0,[48+ebp] +db 102,15,56,0,195 + pxor xmm0,xmm4 + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + ret +align 16 +__vpaes_schedule_transform: + movdqa xmm2,[ebp-16] + movdqa xmm1,xmm2 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm2 + movdqa xmm2,[ebx] +db 102,15,56,0,208 + movdqa xmm0,[16+ebx] +db 102,15,56,0,193 + pxor xmm0,xmm2 + ret +align 16 +__vpaes_schedule_mangle: + movdqa xmm4,xmm0 + movdqa xmm5,[128+ebp] + test edi,edi + jnz NEAR L$014schedule_mangle_dec + add edx,16 + pxor xmm4,[336+ebp] +db 102,15,56,0,229 + movdqa xmm3,xmm4 +db 102,15,56,0,229 + pxor xmm3,xmm4 +db 102,15,56,0,229 + pxor xmm3,xmm4 + jmp NEAR L$015schedule_mangle_both +align 16 +L$014schedule_mangle_dec: + movdqa xmm2,[ebp-16] + lea esi,[416+ebp] + movdqa xmm1,xmm2 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm2 + movdqa xmm2,[esi] +db 102,15,56,0,212 + movdqa xmm3,[16+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 +db 102,15,56,0,221 + movdqa xmm2,[32+esi] +db 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,[48+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 +db 102,15,56,0,221 + movdqa xmm2,[64+esi] +db 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,[80+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 +db 102,15,56,0,221 + movdqa xmm2,[96+esi] +db 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,[112+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 + add edx,-16 +L$015schedule_mangle_both: + movdqa xmm1,[256+ecx*1+ebp] +db 102,15,56,0,217 + add ecx,-16 + and ecx,48 + movdqu [edx],xmm3 + ret +global _vpaes_set_encrypt_key +align 16 +_vpaes_set_encrypt_key: +L$_vpaes_set_encrypt_key_begin: + push ebp + push ebx + push esi + push edi +%ifdef BORINGSSL_DISPATCH_TEST + push ebx + push edx + call L$016pic +L$016pic: + pop ebx + lea ebx,[(_BORINGSSL_function_hit+5-L$016pic)+ebx] + mov edx,1 + mov BYTE [ebx],dl + pop edx + pop ebx +%endif + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov eax,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + mov ebx,eax + shr ebx,5 + add ebx,5 + mov DWORD [240+edx],ebx + mov ecx,48 + mov edi,0 + lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)] + call __vpaes_schedule_core +L$017pic_point: + mov esp,DWORD [48+esp] + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_set_decrypt_key +align 16 +_vpaes_set_decrypt_key: +L$_vpaes_set_decrypt_key_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov eax,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + mov ebx,eax + shr ebx,5 + add ebx,5 + mov DWORD [240+edx],ebx + shl ebx,4 + lea edx,[16+ebx*1+edx] + mov edi,1 + mov ecx,eax + shr ecx,1 + and ecx,32 + xor ecx,32 + lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)] + call __vpaes_schedule_core +L$018pic_point: + mov esp,DWORD [48+esp] + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_encrypt +align 16 +_vpaes_encrypt: +L$_vpaes_encrypt_begin: + push ebp + push ebx + push esi + push edi +%ifdef BORINGSSL_DISPATCH_TEST + push ebx + push edx + call L$019pic +L$019pic: + pop ebx + lea ebx,[(_BORINGSSL_function_hit+4-L$019pic)+ebx] + mov edx,1 + mov BYTE [ebx],dl + pop edx + pop ebx +%endif + lea ebp,[(L$_vpaes_consts+0x30-L$020pic_point)] + call __vpaes_preheat +L$020pic_point: + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov edi,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + movdqu xmm0,[esi] + call __vpaes_encrypt_core + movdqu [edi],xmm0 + mov esp,DWORD [48+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_decrypt +align 16 +_vpaes_decrypt: +L$_vpaes_decrypt_begin: + push ebp + push ebx + push esi + push edi + lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)] + call __vpaes_preheat +L$021pic_point: + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov edi,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + movdqu xmm0,[esi] + call __vpaes_decrypt_core + movdqu [edi],xmm0 + mov esp,DWORD [48+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_cbc_encrypt +align 16 +_vpaes_cbc_encrypt: +L$_vpaes_cbc_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + sub eax,16 + jc NEAR L$022cbc_abort + lea ebx,[esp-56] + mov ebp,DWORD [36+esp] + and ebx,-16 + mov ecx,DWORD [40+esp] + xchg ebx,esp + movdqu xmm1,[ebp] + sub edi,esi + mov DWORD [48+esp],ebx + mov DWORD [esp],edi + mov DWORD [4+esp],edx + mov DWORD [8+esp],ebp + mov edi,eax + lea ebp,[(L$_vpaes_consts+0x30-L$023pic_point)] + call __vpaes_preheat +L$023pic_point: + cmp ecx,0 + je NEAR L$024cbc_dec_loop + jmp NEAR L$025cbc_enc_loop +align 16 +L$025cbc_enc_loop: + movdqu xmm0,[esi] + pxor xmm0,xmm1 + call __vpaes_encrypt_core + mov ebx,DWORD [esp] + mov edx,DWORD [4+esp] + movdqa xmm1,xmm0 + movdqu [esi*1+ebx],xmm0 + lea esi,[16+esi] + sub edi,16 + jnc NEAR L$025cbc_enc_loop + jmp NEAR L$026cbc_done +align 16 +L$024cbc_dec_loop: + movdqu xmm0,[esi] + movdqa [16+esp],xmm1 + movdqa [32+esp],xmm0 + call __vpaes_decrypt_core + mov ebx,DWORD [esp] + mov edx,DWORD [4+esp] + pxor xmm0,[16+esp] + movdqa xmm1,[32+esp] + movdqu [esi*1+ebx],xmm0 + lea esi,[16+esi] + sub edi,16 + jnc NEAR L$024cbc_dec_loop +L$026cbc_done: + mov ebx,DWORD [8+esp] + mov esp,DWORD [48+esp] + movdqu [ebx],xmm1 +L$022cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-apple.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-apple.S new file mode 100644 index 0000000000..5aea40f5b4 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-apple.S @@ -0,0 +1,1131 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_encrypt_core: + + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq L$k_mc_backward(%rip),%r10 + jmp L$enc_entry + +.p2align 4 +L$enc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $0x30,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +L$enc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz L$enc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_encrypt_core_2x: + + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa %xmm9,%xmm7 + movdqa L$k_ipt(%rip),%xmm2 + movdqa %xmm2,%xmm8 + pandn %xmm0,%xmm1 + pandn %xmm6,%xmm7 + movdqu (%r9),%xmm5 + + psrld $4,%xmm1 + psrld $4,%xmm7 + pand %xmm9,%xmm0 + pand %xmm9,%xmm6 +.byte 102,15,56,0,208 +.byte 102,68,15,56,0,198 + movdqa L$k_ipt+16(%rip),%xmm0 + movdqa %xmm0,%xmm6 +.byte 102,15,56,0,193 +.byte 102,15,56,0,247 + pxor %xmm5,%xmm2 + pxor %xmm5,%xmm8 + addq $16,%r9 + pxor %xmm2,%xmm0 + pxor %xmm8,%xmm6 + leaq L$k_mc_backward(%rip),%r10 + jmp L$enc2x_entry + +.p2align 4 +L$enc2x_loop: + + movdqa L$k_sb1(%rip),%xmm4 + movdqa L$k_sb1+16(%rip),%xmm0 + movdqa %xmm4,%xmm12 + movdqa %xmm0,%xmm6 +.byte 102,15,56,0,226 +.byte 102,69,15,56,0,224 +.byte 102,15,56,0,195 +.byte 102,65,15,56,0,243 + pxor %xmm5,%xmm4 + pxor %xmm5,%xmm12 + movdqa L$k_sb2(%rip),%xmm5 + movdqa %xmm5,%xmm13 + pxor %xmm4,%xmm0 + pxor %xmm12,%xmm6 + movdqa -64(%r11,%r10,1),%xmm1 + +.byte 102,15,56,0,234 +.byte 102,69,15,56,0,232 + movdqa (%r11,%r10,1),%xmm4 + + movdqa L$k_sb2+16(%rip),%xmm2 + movdqa %xmm2,%xmm8 +.byte 102,15,56,0,211 +.byte 102,69,15,56,0,195 + movdqa %xmm0,%xmm3 + movdqa %xmm6,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm13,%xmm8 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + addq $16,%r9 + pxor %xmm2,%xmm0 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,220 +.byte 102,68,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm11 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + andq $0x30,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + pxor %xmm11,%xmm6 + +L$enc2x_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm9,%xmm7 + movdqa L$k_inv+16(%rip),%xmm5 + movdqa %xmm5,%xmm13 + pandn %xmm0,%xmm1 + pandn %xmm6,%xmm7 + psrld $4,%xmm1 + psrld $4,%xmm7 + pand %xmm9,%xmm0 + pand %xmm9,%xmm6 +.byte 102,15,56,0,232 +.byte 102,68,15,56,0,238 + movdqa %xmm10,%xmm3 + movdqa %xmm10,%xmm11 + pxor %xmm1,%xmm0 + pxor %xmm7,%xmm6 +.byte 102,15,56,0,217 +.byte 102,68,15,56,0,223 + movdqa %xmm10,%xmm4 + movdqa %xmm10,%xmm12 + pxor %xmm5,%xmm3 + pxor %xmm13,%xmm11 +.byte 102,15,56,0,224 +.byte 102,68,15,56,0,230 + movdqa %xmm10,%xmm2 + movdqa %xmm10,%xmm8 + pxor %xmm5,%xmm4 + pxor %xmm13,%xmm12 +.byte 102,15,56,0,211 +.byte 102,69,15,56,0,195 + movdqa %xmm10,%xmm3 + movdqa %xmm10,%xmm11 + pxor %xmm0,%xmm2 + pxor %xmm6,%xmm8 +.byte 102,15,56,0,220 +.byte 102,69,15,56,0,220 + movdqu (%r9),%xmm5 + + pxor %xmm1,%xmm3 + pxor %xmm7,%xmm11 + jnz L$enc2x_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 + movdqa %xmm4,%xmm12 + movdqa %xmm0,%xmm6 +.byte 102,15,56,0,226 +.byte 102,69,15,56,0,224 + pxor %xmm5,%xmm4 + pxor %xmm5,%xmm12 +.byte 102,15,56,0,195 +.byte 102,65,15,56,0,243 + movdqa 64(%r11,%r10,1),%xmm1 + + pxor %xmm4,%xmm0 + pxor %xmm12,%xmm6 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + ret + + + + + + + + + +.p2align 4 +_vpaes_decrypt_core: + + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_dipt+16(%rip),%xmm0 + xorq $0x30,%r11 + leaq L$k_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $0x30,%r11 + pxor %xmm5,%xmm2 + movdqa L$k_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp L$dec_entry + +.p2align 4 +L$dec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +L$dec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz L$dec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret + + + + + + + + + +.p2align 4 +_vpaes_schedule_core: + + + + + + + call _vpaes_preheat + movdqa L$k_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq L$k_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq L$k_sr(%rip),%r10 + testq %rcx,%rcx + jnz L$schedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp L$schedule_go + +L$schedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $0x30,%r8 + +L$schedule_go: + cmpl $192,%esi + ja L$schedule_256 + je L$schedule_192 + + + + + + + + + + +L$schedule_128: + movl $10,%esi + +L$oop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp L$oop_schedule_128 + + + + + + + + + + + + + + + + +.p2align 4 +L$schedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +L$oop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp L$oop_schedule_192 + + + + + + + + + + + +.p2align 4 +L$schedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +L$oop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $0xFF,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp L$oop_schedule_256 + + + + + + + + + + + + +.p2align 4 +L$schedule_mangle_last: + + leaq L$k_deskew(%rip),%r11 + testq %rcx,%rcx + jnz L$schedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq L$k_opt(%rip),%r11 + addq $32,%rdx + +L$schedule_mangle_last_dec: + addq $-16,%rdx + pxor L$k_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_192_smear: + + pshufd $0x80,%xmm6,%xmm1 + pshufd $0xFE,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret + + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_round: + + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $0xFF,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor L$k_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_transform: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret + + + + + + + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_mangle: + + movdqa %xmm0,%xmm4 + movdqa L$k_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz L$schedule_mangle_dec + + + addq $16,%rdx + pxor L$k_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp L$schedule_mangle_both +.p2align 4 +L$schedule_mangle_dec: + + leaq L$k_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +L$schedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $0x30,%r8 + movdqu %xmm3,(%rdx) + ret + + + + + + +.globl _vpaes_set_encrypt_key +.private_extern _vpaes_set_encrypt_key + +.p2align 4 +_vpaes_set_encrypt_key: + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + + movb $1,_BORINGSSL_function_hit+5(%rip) +#endif + + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $0x30,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + ret + + + +.globl _vpaes_set_decrypt_key +.private_extern _vpaes_set_decrypt_key + +.p2align 4 +_vpaes_set_decrypt_key: + +_CET_ENDBR + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + ret + + + +.globl _vpaes_encrypt +.private_extern _vpaes_encrypt + +.p2align 4 +_vpaes_encrypt: + +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST + + movb $1,_BORINGSSL_function_hit+4(%rip) +#endif + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + ret + + + +.globl _vpaes_decrypt +.private_extern _vpaes_decrypt + +.p2align 4 +_vpaes_decrypt: + +_CET_ENDBR + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + ret + + +.globl _vpaes_cbc_encrypt +.private_extern _vpaes_cbc_encrypt + +.p2align 4 +_vpaes_cbc_encrypt: + +_CET_ENDBR + xchgq %rcx,%rdx + subq $16,%rcx + jc L$cbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je L$cbc_dec_loop + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_enc_loop + jmp L$cbc_done +.p2align 4 +L$cbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_dec_loop +L$cbc_done: + movdqu %xmm6,(%r8) +L$cbc_abort: + ret + + +.globl _vpaes_ctr32_encrypt_blocks +.private_extern _vpaes_ctr32_encrypt_blocks + +.p2align 4 +_vpaes_ctr32_encrypt_blocks: + +_CET_ENDBR + + xchgq %rcx,%rdx + testq %rcx,%rcx + jz L$ctr32_abort + movdqu (%r8),%xmm0 + movdqa L$ctr_add_one(%rip),%xmm8 + subq %rdi,%rsi + call _vpaes_preheat + movdqa %xmm0,%xmm6 + pshufb L$rev_ctr(%rip),%xmm6 + + testq $1,%rcx + jz L$ctr32_prep_loop + + + + movdqu (%rdi),%xmm7 + call _vpaes_encrypt_core + pxor %xmm7,%xmm0 + paddd %xmm8,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + subq $1,%rcx + leaq 16(%rdi),%rdi + jz L$ctr32_done + +L$ctr32_prep_loop: + + + movdqa %xmm6,%xmm14 + movdqa %xmm6,%xmm15 + paddd %xmm8,%xmm15 + +L$ctr32_loop: + movdqa L$rev_ctr(%rip),%xmm1 + movdqa %xmm14,%xmm0 + movdqa %xmm15,%xmm6 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + call _vpaes_encrypt_core_2x + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa L$ctr_add_two(%rip),%xmm3 + pxor %xmm1,%xmm0 + pxor %xmm2,%xmm6 + paddd %xmm3,%xmm14 + paddd %xmm3,%xmm15 + movdqu %xmm0,(%rsi,%rdi,1) + movdqu %xmm6,16(%rsi,%rdi,1) + subq $2,%rcx + leaq 32(%rdi),%rdi + jnz L$ctr32_loop + +L$ctr32_done: +L$ctr32_abort: + ret + + + + + + + + + +.p2align 4 +_vpaes_preheat: + + leaq L$k_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + ret + + + + + + + + +.section __DATA,__const +.p2align 6 +_vpaes_consts: +L$k_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +L$k_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +L$k_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +L$k_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +L$k_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +L$k_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +L$k_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +L$k_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +L$k_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +L$k_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +L$k_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +L$k_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +L$k_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +L$k_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +L$k_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +L$k_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +L$k_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +L$k_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +L$k_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +L$k_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +L$k_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +L$k_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +L$k_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C + + +L$rev_ctr: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 + + +L$ctr_add_one: +.quad 0x0000000000000000, 0x0000000100000000 +L$ctr_add_two: +.quad 0x0000000000000000, 0x0000000200000000 + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.p2align 6 + +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-linux.S b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-linux.S new file mode 100644 index 0000000000..019c63859b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-linux.S @@ -0,0 +1,1133 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: +.cfi_startproc + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $0x30,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.cfi_endproc +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core_2x,@function +.align 16 +_vpaes_encrypt_core_2x: +.cfi_startproc + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa %xmm9,%xmm7 + movdqa .Lk_ipt(%rip),%xmm2 + movdqa %xmm2,%xmm8 + pandn %xmm0,%xmm1 + pandn %xmm6,%xmm7 + movdqu (%r9),%xmm5 + + psrld $4,%xmm1 + psrld $4,%xmm7 + pand %xmm9,%xmm0 + pand %xmm9,%xmm6 +.byte 102,15,56,0,208 +.byte 102,68,15,56,0,198 + movdqa .Lk_ipt+16(%rip),%xmm0 + movdqa %xmm0,%xmm6 +.byte 102,15,56,0,193 +.byte 102,15,56,0,247 + pxor %xmm5,%xmm2 + pxor %xmm5,%xmm8 + addq $16,%r9 + pxor %xmm2,%xmm0 + pxor %xmm8,%xmm6 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc2x_entry + +.align 16 +.Lenc2x_loop: + + movdqa .Lk_sb1(%rip),%xmm4 + movdqa .Lk_sb1+16(%rip),%xmm0 + movdqa %xmm4,%xmm12 + movdqa %xmm0,%xmm6 +.byte 102,15,56,0,226 +.byte 102,69,15,56,0,224 +.byte 102,15,56,0,195 +.byte 102,65,15,56,0,243 + pxor %xmm5,%xmm4 + pxor %xmm5,%xmm12 + movdqa .Lk_sb2(%rip),%xmm5 + movdqa %xmm5,%xmm13 + pxor %xmm4,%xmm0 + pxor %xmm12,%xmm6 + movdqa -64(%r11,%r10,1),%xmm1 + +.byte 102,15,56,0,234 +.byte 102,69,15,56,0,232 + movdqa (%r11,%r10,1),%xmm4 + + movdqa .Lk_sb2+16(%rip),%xmm2 + movdqa %xmm2,%xmm8 +.byte 102,15,56,0,211 +.byte 102,69,15,56,0,195 + movdqa %xmm0,%xmm3 + movdqa %xmm6,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm13,%xmm8 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + addq $16,%r9 + pxor %xmm2,%xmm0 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,220 +.byte 102,68,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm11 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + andq $0x30,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + pxor %xmm11,%xmm6 + +.Lenc2x_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm9,%xmm7 + movdqa .Lk_inv+16(%rip),%xmm5 + movdqa %xmm5,%xmm13 + pandn %xmm0,%xmm1 + pandn %xmm6,%xmm7 + psrld $4,%xmm1 + psrld $4,%xmm7 + pand %xmm9,%xmm0 + pand %xmm9,%xmm6 +.byte 102,15,56,0,232 +.byte 102,68,15,56,0,238 + movdqa %xmm10,%xmm3 + movdqa %xmm10,%xmm11 + pxor %xmm1,%xmm0 + pxor %xmm7,%xmm6 +.byte 102,15,56,0,217 +.byte 102,68,15,56,0,223 + movdqa %xmm10,%xmm4 + movdqa %xmm10,%xmm12 + pxor %xmm5,%xmm3 + pxor %xmm13,%xmm11 +.byte 102,15,56,0,224 +.byte 102,68,15,56,0,230 + movdqa %xmm10,%xmm2 + movdqa %xmm10,%xmm8 + pxor %xmm5,%xmm4 + pxor %xmm13,%xmm12 +.byte 102,15,56,0,211 +.byte 102,69,15,56,0,195 + movdqa %xmm10,%xmm3 + movdqa %xmm10,%xmm11 + pxor %xmm0,%xmm2 + pxor %xmm6,%xmm8 +.byte 102,15,56,0,220 +.byte 102,69,15,56,0,220 + movdqu (%r9),%xmm5 + + pxor %xmm1,%xmm3 + pxor %xmm7,%xmm11 + jnz .Lenc2x_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 + movdqa %xmm4,%xmm12 + movdqa %xmm0,%xmm6 +.byte 102,15,56,0,226 +.byte 102,69,15,56,0,224 + pxor %xmm5,%xmm4 + pxor %xmm5,%xmm12 +.byte 102,15,56,0,195 +.byte 102,65,15,56,0,243 + movdqa 64(%r11,%r10,1),%xmm1 + + pxor %xmm4,%xmm0 + pxor %xmm12,%xmm6 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + ret +.cfi_endproc +.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x + + + + + + +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: +.cfi_startproc + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $0x30,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $0x30,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.cfi_endproc +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + + + + + + +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: +.cfi_startproc + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $0x30,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $0xFF,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.align 16 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.cfi_endproc +.size _vpaes_schedule_core,.-_vpaes_schedule_core + + + + + + + + + + + + + + + +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: +.cfi_startproc + pshufd $0x80,%xmm6,%xmm1 + pshufd $0xFE,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.cfi_endproc +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: +.cfi_startproc + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $0xFF,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.cfi_endproc +.size _vpaes_schedule_round,.-_vpaes_schedule_round + + + + + + + + + + +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: +.cfi_startproc + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.cfi_endproc +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: +.cfi_startproc + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $0x30,%r8 + movdqu %xmm3,(%rdx) + ret +.cfi_endproc +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + + + + +.globl vpaes_set_encrypt_key +.hidden vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.cfi_startproc +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+5(%rip) +#endif + + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $0x30,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + ret +.cfi_endproc +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.hidden vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.cfi_startproc +_CET_ENDBR + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + ret +.cfi_endproc +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +.globl vpaes_encrypt +.hidden vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.cfi_startproc +_CET_ENDBR +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+4(%rip) +#endif + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + ret +.cfi_endproc +.size vpaes_encrypt,.-vpaes_encrypt + +.globl vpaes_decrypt +.hidden vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.cfi_startproc +_CET_ENDBR + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + ret +.cfi_endproc +.size vpaes_decrypt,.-vpaes_decrypt +.globl vpaes_cbc_encrypt +.hidden vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.cfi_startproc +_CET_ENDBR + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) +.Lcbc_abort: + ret +.cfi_endproc +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt +.globl vpaes_ctr32_encrypt_blocks +.hidden vpaes_ctr32_encrypt_blocks +.type vpaes_ctr32_encrypt_blocks,@function +.align 16 +vpaes_ctr32_encrypt_blocks: +.cfi_startproc +_CET_ENDBR + + xchgq %rcx,%rdx + testq %rcx,%rcx + jz .Lctr32_abort + movdqu (%r8),%xmm0 + movdqa .Lctr_add_one(%rip),%xmm8 + subq %rdi,%rsi + call _vpaes_preheat + movdqa %xmm0,%xmm6 + pshufb .Lrev_ctr(%rip),%xmm6 + + testq $1,%rcx + jz .Lctr32_prep_loop + + + + movdqu (%rdi),%xmm7 + call _vpaes_encrypt_core + pxor %xmm7,%xmm0 + paddd %xmm8,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + subq $1,%rcx + leaq 16(%rdi),%rdi + jz .Lctr32_done + +.Lctr32_prep_loop: + + + movdqa %xmm6,%xmm14 + movdqa %xmm6,%xmm15 + paddd %xmm8,%xmm15 + +.Lctr32_loop: + movdqa .Lrev_ctr(%rip),%xmm1 + movdqa %xmm14,%xmm0 + movdqa %xmm15,%xmm6 +.byte 102,15,56,0,193 +.byte 102,15,56,0,241 + call _vpaes_encrypt_core_2x + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa .Lctr_add_two(%rip),%xmm3 + pxor %xmm1,%xmm0 + pxor %xmm2,%xmm6 + paddd %xmm3,%xmm14 + paddd %xmm3,%xmm15 + movdqu %xmm0,(%rsi,%rdi,1) + movdqu %xmm6,16(%rsi,%rdi,1) + subq $2,%rcx + leaq 32(%rdi),%rdi + jnz .Lctr32_loop + +.Lctr32_done: +.Lctr32_abort: + ret +.cfi_endproc +.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks + + + + + + +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: +.cfi_startproc + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + ret +.cfi_endproc +.size _vpaes_preheat,.-_vpaes_preheat + + + + + +.type _vpaes_consts,@object +.section .rodata +.align 64 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C + + +.Lrev_ctr: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 + + +.Lctr_add_one: +.quad 0x0000000000000000, 0x0000000100000000 +.Lctr_add_two: +.quad 0x0000000000000000, 0x0000000200000000 + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 64 +.size _vpaes_consts,.-_vpaes_consts +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-win.asm b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-win.asm new file mode 100644 index 0000000000..ddbfb1212c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/vpaes-x86_64-win.asm @@ -0,0 +1,1487 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_encrypt_core: + + mov r9,rdx + mov r11,16 + mov eax,DWORD[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD[$L$k_ipt] + pandn xmm1,xmm0 + movdqu xmm5,XMMWORD[r9] + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD[(($L$k_ipt+16))] +DB 102,15,56,0,193 + pxor xmm2,xmm5 + add r9,16 + pxor xmm0,xmm2 + lea r10,[$L$k_mc_backward] + jmp NEAR $L$enc_entry + +ALIGN 16 +$L$enc_loop: + + movdqa xmm4,xmm13 + movdqa xmm0,xmm12 +DB 102,15,56,0,226 +DB 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,xmm15 + pxor xmm0,xmm4 + movdqa xmm1,XMMWORD[((-64))+r10*1+r11] +DB 102,15,56,0,234 + movdqa xmm4,XMMWORD[r10*1+r11] + movdqa xmm2,xmm14 +DB 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +DB 102,15,56,0,193 + add r9,16 + pxor xmm0,xmm2 +DB 102,15,56,0,220 + add r11,16 + pxor xmm3,xmm0 +DB 102,15,56,0,193 + and r11,0x30 + sub rax,1 + pxor xmm0,xmm3 + +$L$enc_entry: + + movdqa xmm1,xmm9 + movdqa xmm5,xmm11 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,232 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm5 +DB 102,15,56,0,224 + movdqa xmm2,xmm10 + pxor xmm4,xmm5 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm5,XMMWORD[r9] + pxor xmm3,xmm1 + jnz NEAR $L$enc_loop + + + movdqa xmm4,XMMWORD[((-96))+r10] + movdqa xmm0,XMMWORD[((-80))+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm5 +DB 102,15,56,0,195 + movdqa xmm1,XMMWORD[64+r10*1+r11] + pxor xmm0,xmm4 +DB 102,15,56,0,193 + ret + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_encrypt_core_2x: + + mov r9,rdx + mov r11,16 + mov eax,DWORD[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm7,xmm9 + movdqa xmm2,XMMWORD[$L$k_ipt] + movdqa xmm8,xmm2 + pandn xmm1,xmm0 + pandn xmm7,xmm6 + movdqu xmm5,XMMWORD[r9] + + psrld xmm1,4 + psrld xmm7,4 + pand xmm0,xmm9 + pand xmm6,xmm9 +DB 102,15,56,0,208 +DB 102,68,15,56,0,198 + movdqa xmm0,XMMWORD[(($L$k_ipt+16))] + movdqa xmm6,xmm0 +DB 102,15,56,0,193 +DB 102,15,56,0,247 + pxor xmm2,xmm5 + pxor xmm8,xmm5 + add r9,16 + pxor xmm0,xmm2 + pxor xmm6,xmm8 + lea r10,[$L$k_mc_backward] + jmp NEAR $L$enc2x_entry + +ALIGN 16 +$L$enc2x_loop: + + movdqa xmm4,XMMWORD[$L$k_sb1] + movdqa xmm0,XMMWORD[(($L$k_sb1+16))] + movdqa xmm12,xmm4 + movdqa xmm6,xmm0 +DB 102,15,56,0,226 +DB 102,69,15,56,0,224 +DB 102,15,56,0,195 +DB 102,65,15,56,0,243 + pxor xmm4,xmm5 + pxor xmm12,xmm5 + movdqa xmm5,XMMWORD[$L$k_sb2] + movdqa xmm13,xmm5 + pxor xmm0,xmm4 + pxor xmm6,xmm12 + movdqa xmm1,XMMWORD[((-64))+r10*1+r11] + +DB 102,15,56,0,234 +DB 102,69,15,56,0,232 + movdqa xmm4,XMMWORD[r10*1+r11] + + movdqa xmm2,XMMWORD[(($L$k_sb2+16))] + movdqa xmm8,xmm2 +DB 102,15,56,0,211 +DB 102,69,15,56,0,195 + movdqa xmm3,xmm0 + movdqa xmm11,xmm6 + pxor xmm2,xmm5 + pxor xmm8,xmm13 +DB 102,15,56,0,193 +DB 102,15,56,0,241 + add r9,16 + pxor xmm0,xmm2 + pxor xmm6,xmm8 +DB 102,15,56,0,220 +DB 102,68,15,56,0,220 + add r11,16 + pxor xmm3,xmm0 + pxor xmm11,xmm6 +DB 102,15,56,0,193 +DB 102,15,56,0,241 + and r11,0x30 + sub rax,1 + pxor xmm0,xmm3 + pxor xmm6,xmm11 + +$L$enc2x_entry: + + movdqa xmm1,xmm9 + movdqa xmm7,xmm9 + movdqa xmm5,XMMWORD[(($L$k_inv+16))] + movdqa xmm13,xmm5 + pandn xmm1,xmm0 + pandn xmm7,xmm6 + psrld xmm1,4 + psrld xmm7,4 + pand xmm0,xmm9 + pand xmm6,xmm9 +DB 102,15,56,0,232 +DB 102,68,15,56,0,238 + movdqa xmm3,xmm10 + movdqa xmm11,xmm10 + pxor xmm0,xmm1 + pxor xmm6,xmm7 +DB 102,15,56,0,217 +DB 102,68,15,56,0,223 + movdqa xmm4,xmm10 + movdqa xmm12,xmm10 + pxor xmm3,xmm5 + pxor xmm11,xmm13 +DB 102,15,56,0,224 +DB 102,68,15,56,0,230 + movdqa xmm2,xmm10 + movdqa xmm8,xmm10 + pxor xmm4,xmm5 + pxor xmm12,xmm13 +DB 102,15,56,0,211 +DB 102,69,15,56,0,195 + movdqa xmm3,xmm10 + movdqa xmm11,xmm10 + pxor xmm2,xmm0 + pxor xmm8,xmm6 +DB 102,15,56,0,220 +DB 102,69,15,56,0,220 + movdqu xmm5,XMMWORD[r9] + + pxor xmm3,xmm1 + pxor xmm11,xmm7 + jnz NEAR $L$enc2x_loop + + + movdqa xmm4,XMMWORD[((-96))+r10] + movdqa xmm0,XMMWORD[((-80))+r10] + movdqa xmm12,xmm4 + movdqa xmm6,xmm0 +DB 102,15,56,0,226 +DB 102,69,15,56,0,224 + pxor xmm4,xmm5 + pxor xmm12,xmm5 +DB 102,15,56,0,195 +DB 102,65,15,56,0,243 + movdqa xmm1,XMMWORD[64+r10*1+r11] + + pxor xmm0,xmm4 + pxor xmm6,xmm12 +DB 102,15,56,0,193 +DB 102,15,56,0,241 + ret + + + + + + + + + +ALIGN 16 +_vpaes_decrypt_core: + + mov r9,rdx + mov eax,DWORD[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD[$L$k_dipt] + pandn xmm1,xmm0 + mov r11,rax + psrld xmm1,4 + movdqu xmm5,XMMWORD[r9] + shl r11,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD[(($L$k_dipt+16))] + xor r11,0x30 + lea r10,[$L$k_dsbd] +DB 102,15,56,0,193 + and r11,0x30 + pxor xmm2,xmm5 + movdqa xmm5,XMMWORD[(($L$k_mc_forward+48))] + pxor xmm0,xmm2 + add r9,16 + add r11,r10 + jmp NEAR $L$dec_entry + +ALIGN 16 +$L$dec_loop: + + + + movdqa xmm4,XMMWORD[((-32))+r10] + movdqa xmm1,XMMWORD[((-16))+r10] +DB 102,15,56,0,226 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD[r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD[16+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD[32+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD[48+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD[64+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD[80+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + add r9,16 +DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub rax,1 + +$L$dec_entry: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + movdqa xmm2,xmm11 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm2 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm0,XMMWORD[r9] + pxor xmm3,xmm1 + jnz NEAR $L$dec_loop + + + movdqa xmm4,XMMWORD[96+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD[112+r10] + movdqa xmm2,XMMWORD[((-352))+r11] +DB 102,15,56,0,195 + pxor xmm0,xmm4 +DB 102,15,56,0,194 + ret + + + + + + + + + +ALIGN 16 +_vpaes_schedule_core: + + + + + + + call _vpaes_preheat + movdqa xmm8,XMMWORD[$L$k_rcon] + movdqu xmm0,XMMWORD[rdi] + + + movdqa xmm3,xmm0 + lea r11,[$L$k_ipt] + call _vpaes_schedule_transform + movdqa xmm7,xmm0 + + lea r10,[$L$k_sr] + test rcx,rcx + jnz NEAR $L$schedule_am_decrypting + + + movdqu XMMWORD[rdx],xmm0 + jmp NEAR $L$schedule_go + +$L$schedule_am_decrypting: + + movdqa xmm1,XMMWORD[r10*1+r8] +DB 102,15,56,0,217 + movdqu XMMWORD[rdx],xmm3 + xor r8,0x30 + +$L$schedule_go: + cmp esi,192 + ja NEAR $L$schedule_256 + je NEAR $L$schedule_192 + + + + + + + + + + +$L$schedule_128: + mov esi,10 + +$L$oop_schedule_128: + call _vpaes_schedule_round + dec rsi + jz NEAR $L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp NEAR $L$oop_schedule_128 + + + + + + + + + + + + + + + + +ALIGN 16 +$L$schedule_192: + movdqu xmm0,XMMWORD[8+rdi] + call _vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov esi,4 + +$L$oop_schedule_192: + call _vpaes_schedule_round +DB 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + dec rsi + jz NEAR $L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp NEAR $L$oop_schedule_192 + + + + + + + + + + + +ALIGN 16 +$L$schedule_256: + movdqu xmm0,XMMWORD[16+rdi] + call _vpaes_schedule_transform + mov esi,7 + +$L$oop_schedule_256: + call _vpaes_schedule_mangle + movdqa xmm6,xmm0 + + + call _vpaes_schedule_round + dec rsi + jz NEAR $L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd xmm0,xmm0,0xFF + movdqa xmm5,xmm7 + movdqa xmm7,xmm6 + call _vpaes_schedule_low_round + movdqa xmm7,xmm5 + + jmp NEAR $L$oop_schedule_256 + + + + + + + + + + + + +ALIGN 16 +$L$schedule_mangle_last: + + lea r11,[$L$k_deskew] + test rcx,rcx + jnz NEAR $L$schedule_mangle_last_dec + + + movdqa xmm1,XMMWORD[r10*1+r8] +DB 102,15,56,0,193 + lea r11,[$L$k_opt] + add rdx,32 + +$L$schedule_mangle_last_dec: + add rdx,-16 + pxor xmm0,XMMWORD[$L$k_s63] + call _vpaes_schedule_transform + movdqu XMMWORD[rdx],xmm0 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + ret + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_192_smear: + + pshufd xmm1,xmm6,0x80 + pshufd xmm0,xmm7,0xFE + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + ret + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_round: + + + pxor xmm1,xmm1 +DB 102,65,15,58,15,200,15 +DB 102,69,15,58,15,192,15 + pxor xmm7,xmm1 + + + pshufd xmm0,xmm0,0xFF +DB 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,XMMWORD[$L$k_s63] + + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,xmm11 +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,xmm13 +DB 102,15,56,0,226 + movdqa xmm0,xmm12 +DB 102,15,56,0,195 + pxor xmm0,xmm4 + + + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + ret + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_transform: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,XMMWORD[r11] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD[16+r11] +DB 102,15,56,0,193 + pxor xmm0,xmm2 + ret + + + + + + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_mangle: + + movdqa xmm4,xmm0 + movdqa xmm5,XMMWORD[$L$k_mc_forward] + test rcx,rcx + jnz NEAR $L$schedule_mangle_dec + + + add rdx,16 + pxor xmm4,XMMWORD[$L$k_s63] +DB 102,15,56,0,229 + movdqa xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 + + jmp NEAR $L$schedule_mangle_both +ALIGN 16 +$L$schedule_mangle_dec: + + lea r11,[$L$k_dksd] + movdqa xmm1,xmm9 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm9 + + movdqa xmm2,XMMWORD[r11] +DB 102,15,56,0,212 + movdqa xmm3,XMMWORD[16+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD[32+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD[48+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD[64+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD[80+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD[96+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD[112+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 + + add rdx,-16 + +$L$schedule_mangle_both: + movdqa xmm1,XMMWORD[r10*1+r8] +DB 102,15,56,0,217 + add r8,-16 + and r8,0x30 + movdqu XMMWORD[rdx],xmm3 + ret + + + + + + +global vpaes_set_encrypt_key + +ALIGN 16 +vpaes_set_encrypt_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_encrypt_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR +%ifdef BORINGSSL_DISPATCH_TEST +EXTERN BORINGSSL_function_hit + mov BYTE[((BORINGSSL_function_hit+5))],1 +%endif + + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$enc_key_body: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD[240+rdx],eax + + mov ecx,0 + mov r8d,0x30 + call _vpaes_schedule_core + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$enc_key_epilogue: + xor eax,eax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_vpaes_set_encrypt_key: + +global vpaes_set_decrypt_key + +ALIGN 16 +vpaes_set_decrypt_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_decrypt_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$dec_key_body: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD[240+rdx],eax + shl eax,4 + lea rdx,[16+rax*1+rdx] + + mov ecx,1 + mov r8d,esi + shr r8d,1 + and r8d,32 + xor r8d,32 + call _vpaes_schedule_core + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$dec_key_epilogue: + xor eax,eax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_vpaes_set_decrypt_key: + +global vpaes_encrypt + +ALIGN 16 +vpaes_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR +%ifdef BORINGSSL_DISPATCH_TEST +EXTERN BORINGSSL_function_hit + mov BYTE[((BORINGSSL_function_hit+4))],1 +%endif + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$enc_body: + movdqu xmm0,XMMWORD[rdi] + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu XMMWORD[rsi],xmm0 + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$enc_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_vpaes_encrypt: + +global vpaes_decrypt + +ALIGN 16 +vpaes_decrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_decrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$dec_body: + movdqu xmm0,XMMWORD[rdi] + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu XMMWORD[rsi],xmm0 + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$dec_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_vpaes_decrypt: +global vpaes_cbc_encrypt + +ALIGN 16 +vpaes_cbc_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_cbc_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + xchg rdx,rcx + sub rcx,16 + jc NEAR $L$cbc_abort + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$cbc_body: + movdqu xmm6,XMMWORD[r8] + sub rsi,rdi + call _vpaes_preheat + cmp r9d,0 + je NEAR $L$cbc_dec_loop + jmp NEAR $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop: + movdqu xmm0,XMMWORD[rdi] + pxor xmm0,xmm6 + call _vpaes_encrypt_core + movdqa xmm6,xmm0 + movdqu XMMWORD[rdi*1+rsi],xmm0 + lea rdi,[16+rdi] + sub rcx,16 + jnc NEAR $L$cbc_enc_loop + jmp NEAR $L$cbc_done +ALIGN 16 +$L$cbc_dec_loop: + movdqu xmm0,XMMWORD[rdi] + movdqa xmm7,xmm0 + call _vpaes_decrypt_core + pxor xmm0,xmm6 + movdqa xmm6,xmm7 + movdqu XMMWORD[rdi*1+rsi],xmm0 + lea rdi,[16+rdi] + sub rcx,16 + jnc NEAR $L$cbc_dec_loop +$L$cbc_done: + movdqu XMMWORD[r8],xmm6 + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$cbc_epilogue: +$L$cbc_abort: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_vpaes_cbc_encrypt: +global vpaes_ctr32_encrypt_blocks + +ALIGN 16 +vpaes_ctr32_encrypt_blocks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_ctr32_encrypt_blocks: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + + xchg rdx,rcx + test rcx,rcx + jz NEAR $L$ctr32_abort + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$ctr32_body: + movdqu xmm0,XMMWORD[r8] + movdqa xmm8,XMMWORD[$L$ctr_add_one] + sub rsi,rdi + call _vpaes_preheat + movdqa xmm6,xmm0 + pshufb xmm6,XMMWORD[$L$rev_ctr] + + test rcx,1 + jz NEAR $L$ctr32_prep_loop + + + + movdqu xmm7,XMMWORD[rdi] + call _vpaes_encrypt_core + pxor xmm0,xmm7 + paddd xmm6,xmm8 + movdqu XMMWORD[rdi*1+rsi],xmm0 + sub rcx,1 + lea rdi,[16+rdi] + jz NEAR $L$ctr32_done + +$L$ctr32_prep_loop: + + + movdqa xmm14,xmm6 + movdqa xmm15,xmm6 + paddd xmm15,xmm8 + +$L$ctr32_loop: + movdqa xmm1,XMMWORD[$L$rev_ctr] + movdqa xmm0,xmm14 + movdqa xmm6,xmm15 +DB 102,15,56,0,193 +DB 102,15,56,0,241 + call _vpaes_encrypt_core_2x + movdqu xmm1,XMMWORD[rdi] + movdqu xmm2,XMMWORD[16+rdi] + movdqa xmm3,XMMWORD[$L$ctr_add_two] + pxor xmm0,xmm1 + pxor xmm6,xmm2 + paddd xmm14,xmm3 + paddd xmm15,xmm3 + movdqu XMMWORD[rdi*1+rsi],xmm0 + movdqu XMMWORD[16+rdi*1+rsi],xmm6 + sub rcx,2 + lea rdi,[32+rdi] + jnz NEAR $L$ctr32_loop + +$L$ctr32_done: + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$ctr32_epilogue: +$L$ctr32_abort: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_vpaes_ctr32_encrypt_blocks: + + + + + + + +ALIGN 16 +_vpaes_preheat: + + lea r10,[$L$k_s0F] + movdqa xmm10,XMMWORD[((-32))+r10] + movdqa xmm11,XMMWORD[((-16))+r10] + movdqa xmm9,XMMWORD[r10] + movdqa xmm13,XMMWORD[48+r10] + movdqa xmm12,XMMWORD[64+r10] + movdqa xmm15,XMMWORD[80+r10] + movdqa xmm14,XMMWORD[96+r10] + ret + + + + + + + + +section .rdata rdata align=8 +ALIGN 64 +_vpaes_consts: +$L$k_inv: + DQ 0x0E05060F0D080180,0x040703090A0B0C02 + DQ 0x01040A060F0B0780,0x030D0E0C02050809 + +$L$k_s0F: + DQ 0x0F0F0F0F0F0F0F0F,0x0F0F0F0F0F0F0F0F + +$L$k_ipt: + DQ 0xC2B2E8985A2A7000,0xCABAE09052227808 + DQ 0x4C01307D317C4D00,0xCD80B1FCB0FDCC81 + +$L$k_sb1: + DQ 0xB19BE18FCB503E00,0xA5DF7A6E142AF544 + DQ 0x3618D415FAE22300,0x3BF7CCC10D2ED9EF +$L$k_sb2: + DQ 0xE27A93C60B712400,0x5EB7E955BC982FCD + DQ 0x69EB88400AE12900,0xC2A163C8AB82234A +$L$k_sbo: + DQ 0xD0D26D176FBDC700,0x15AABF7AC502A878 + DQ 0xCFE474A55FBB6A00,0x8E1E90D1412B35FA + +$L$k_mc_forward: + DQ 0x0407060500030201,0x0C0F0E0D080B0A09 + DQ 0x080B0A0904070605,0x000302010C0F0E0D + DQ 0x0C0F0E0D080B0A09,0x0407060500030201 + DQ 0x000302010C0F0E0D,0x080B0A0904070605 + +$L$k_mc_backward: + DQ 0x0605040702010003,0x0E0D0C0F0A09080B + DQ 0x020100030E0D0C0F,0x0A09080B06050407 + DQ 0x0E0D0C0F0A09080B,0x0605040702010003 + DQ 0x0A09080B06050407,0x020100030E0D0C0F + +$L$k_sr: + DQ 0x0706050403020100,0x0F0E0D0C0B0A0908 + DQ 0x030E09040F0A0500,0x0B06010C07020D08 + DQ 0x0F060D040B020900,0x070E050C030A0108 + DQ 0x0B0E0104070A0D00,0x0306090C0F020508 + +$L$k_rcon: + DQ 0x1F8391B9AF9DEEB6,0x702A98084D7C7D81 + +$L$k_s63: + DQ 0x5B5B5B5B5B5B5B5B,0x5B5B5B5B5B5B5B5B + +$L$k_opt: + DQ 0xFF9F4929D6B66000,0xF7974121DEBE6808 + DQ 0x01EDBD5150BCEC00,0xE10D5DB1B05C0CE0 + +$L$k_deskew: + DQ 0x07E4A34047A4E300,0x1DFEB95A5DBEF91A + DQ 0x5F36B5DC83EA6900,0x2841C2ABF49D1E77 + + + + + +$L$k_dksd: + DQ 0xFEB91A5DA3E44700,0x0740E3A45A1DBEF9 + DQ 0x41C277F4B5368300,0x5FDC69EAAB289D1E +$L$k_dksb: + DQ 0x9A4FCA1F8550D500,0x03D653861CC94C99 + DQ 0x115BEDA7B6FC4A00,0xD993256F7E3482C8 +$L$k_dkse: + DQ 0xD5031CCA1FC9D600,0x53859A4C994F5086 + DQ 0xA23196054FDC7BE8,0xCD5EF96A20B31487 +$L$k_dks9: + DQ 0xB6116FC87ED9A700,0x4AED933482255BFC + DQ 0x4576516227143300,0x8BB89FACE9DAFDCE + + + + + +$L$k_dipt: + DQ 0x0F505B040B545F00,0x154A411E114E451A + DQ 0x86E383E660056500,0x12771772F491F194 + +$L$k_dsb9: + DQ 0x851C03539A86D600,0xCAD51F504F994CC9 + DQ 0xC03B1789ECD74900,0x725E2C9EB2FBA565 +$L$k_dsbd: + DQ 0x7D57CCDFE6B1A200,0xF56E9B13882A4439 + DQ 0x3CE2FAF724C6CB00,0x2931180D15DEEFD3 +$L$k_dsbb: + DQ 0xD022649296B44200,0x602646F6B0F2D404 + DQ 0xC19498A6CD596700,0xF3FF0C3E3255AA6B +$L$k_dsbe: + DQ 0x46F2929626D4D000,0x2242600464B4F6B0 + DQ 0x0C55A6CDFFAAC100,0x9467F36B98593E32 +$L$k_dsbo: + DQ 0x1387EA537EF94000,0xC7AA6DB9D4943E2D + DQ 0x12D7560F93441D00,0xCA4B8159D8C58E9C + + +$L$rev_ctr: + DQ 0x0706050403020100,0x0c0d0e0f0b0a0908 + + +$L$ctr_add_one: + DQ 0x0000000000000000,0x0000000100000000 +$L$ctr_add_two: + DQ 0x0000000000000000,0x0000000200000000 + + DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 + DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54 + DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97 + DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32 + DB 85,110,105,118,101,114,115,105,116,121,41,0 +ALIGN 64 + +section .text + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rsi,[16+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + lea rax,[184+rax] + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_vpaes_set_encrypt_key wrt ..imagebase + DD $L$SEH_end_vpaes_set_encrypt_key wrt ..imagebase + DD $L$SEH_info_vpaes_set_encrypt_key wrt ..imagebase + + DD $L$SEH_begin_vpaes_set_decrypt_key wrt ..imagebase + DD $L$SEH_end_vpaes_set_decrypt_key wrt ..imagebase + DD $L$SEH_info_vpaes_set_decrypt_key wrt ..imagebase + + DD $L$SEH_begin_vpaes_encrypt wrt ..imagebase + DD $L$SEH_end_vpaes_encrypt wrt ..imagebase + DD $L$SEH_info_vpaes_encrypt wrt ..imagebase + + DD $L$SEH_begin_vpaes_decrypt wrt ..imagebase + DD $L$SEH_end_vpaes_decrypt wrt ..imagebase + DD $L$SEH_info_vpaes_decrypt wrt ..imagebase + + DD $L$SEH_begin_vpaes_cbc_encrypt wrt ..imagebase + DD $L$SEH_end_vpaes_cbc_encrypt wrt ..imagebase + DD $L$SEH_info_vpaes_cbc_encrypt wrt ..imagebase + + DD $L$SEH_begin_vpaes_ctr32_encrypt_blocks wrt ..imagebase + DD $L$SEH_end_vpaes_ctr32_encrypt_blocks wrt ..imagebase + DD $L$SEH_info_vpaes_ctr32_encrypt_blocks wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_vpaes_set_encrypt_key: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$enc_key_body wrt ..imagebase,$L$enc_key_epilogue wrt ..imagebase +$L$SEH_info_vpaes_set_decrypt_key: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$dec_key_body wrt ..imagebase,$L$dec_key_epilogue wrt ..imagebase +$L$SEH_info_vpaes_encrypt: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$enc_body wrt ..imagebase,$L$enc_epilogue wrt ..imagebase +$L$SEH_info_vpaes_decrypt: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$dec_body wrt ..imagebase,$L$dec_epilogue wrt ..imagebase +$L$SEH_info_vpaes_cbc_encrypt: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$cbc_body wrt ..imagebase,$L$cbc_epilogue wrt ..imagebase +$L$SEH_info_vpaes_ctr32_encrypt_blocks: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86-mont-apple.S b/yass/third_party/boringssl/src/gen/bcm/x86-mont-apple.S new file mode 100644 index 0000000000..f991f6c563 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86-mont-apple.S @@ -0,0 +1,484 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _bn_mul_mont +.private_extern _bn_mul_mont +.align 4 +_bn_mul_mont: +L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%ebp + negl %edi + movl %ebp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%ebp + xorl %ebp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax + andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk + jmp L002page_walk_done +.align 4,0x90 +L001page_walk: + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk +L002page_walk_done: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%ebp + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %ebp,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %edx,24(%esp) + call L003PIC_me_up +L003PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L004non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 4,0x90 +L0051st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl L0051st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +L006outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +L007inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz L007inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle L006outer + emms + jmp L008common_tail +.align 4,0x90 +L004non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz L009bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 4,0x90 +L010mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L010mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp L0112ndmadd +.align 4,0x90 +L0121stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L0121stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 4,0x90 +L0112ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0112ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je L008common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp L0121stmadd +.align 4,0x90 +L009bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 4,0x90 +L013sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl L013sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 4,0x90 +L0143rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0143rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je L008common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je L015sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 4,0x90 +L016sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle L016sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +L015sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp L0143rdmadd +.align 4,0x90 +L008common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 4,0x90 +L017sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge L017sub + sbbl $0,%eax + movl $-1,%edx + xorl %eax,%edx + jmp L018copy +.align 4,0x90 +L018copy: + movl 32(%esp,%ebx,4),%esi + movl (%edi,%ebx,4),%ebp + movl %ecx,32(%esp,%ebx,4) + andl %eax,%esi + andl %edx,%ebp + orl %esi,%ebp + movl %ebp,(%edi,%ebx,4) + decl %ebx + jge L018copy + movl 24(%esp),%esp + movl $1,%eax +L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/bcm/x86-mont-linux.S b/yass/third_party/boringssl/src/gen/bcm/x86-mont-linux.S new file mode 100644 index 0000000000..e6b4ef574f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86-mont-linux.S @@ -0,0 +1,482 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl bn_mul_mont +.hidden bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: +.L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl .L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%ebp + negl %edi + movl %ebp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%ebp + xorl %ebp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax + andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk + jmp .L002page_walk_done +.align 16 +.L001page_walk: + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk +.L002page_walk_done: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%ebp + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %ebp,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %edx,24(%esp) + call .L003PIC_me_up +.L003PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L004non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 16 +.L0051st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl .L0051st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +.L006outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +.L007inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz .L007inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle .L006outer + emms + jmp .L008common_tail +.align 16 +.L004non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz .L009bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 16 +.L010mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L010mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp .L0112ndmadd +.align 16 +.L0121stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L0121stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 16 +.L0112ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0112ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je .L008common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp .L0121stmadd +.align 16 +.L009bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 16 +.L013sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl .L013sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 16 +.L0143rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0143rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je .L008common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je .L015sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 16 +.L016sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle .L016sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +.L015sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp .L0143rdmadd +.align 16 +.L008common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 16 +.L017sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge .L017sub + sbbl $0,%eax + movl $-1,%edx + xorl %eax,%edx + jmp .L018copy +.align 16 +.L018copy: + movl 32(%esp,%ebx,4),%esi + movl (%edi,%ebx,4),%ebp + movl %ecx,32(%esp,%ebx,4) + andl %eax,%esi + andl %edx,%ebp + orl %esi,%ebp + movl %ebp,(%edi,%ebx,4) + decl %ebx + jge .L018copy + movl 24(%esp),%esp + movl $1,%eax +.L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_mont,.-.L_bn_mul_mont_begin +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/bcm/x86-mont-win.asm b/yass/third_party/boringssl/src/gen/bcm/x86-mont-win.asm new file mode 100644 index 0000000000..cd77529cbc --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86-mont-win.asm @@ -0,0 +1,490 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _bn_mul_mont +align 16 +_bn_mul_mont: +L$_bn_mul_mont_begin: + push ebp + push ebx + push esi + push edi + xor eax,eax + mov edi,DWORD [40+esp] + cmp edi,4 + jl NEAR L$000just_leave + lea esi,[20+esp] + lea edx,[24+esp] + add edi,2 + neg edi + lea ebp,[edi*4+esp-32] + neg edi + mov eax,ebp + sub eax,edx + and eax,2047 + sub ebp,eax + xor edx,ebp + and edx,2048 + xor edx,2048 + sub ebp,edx + and ebp,-64 + mov eax,esp + sub eax,ebp + and eax,-4096 + mov edx,esp + lea esp,[eax*1+ebp] + mov eax,DWORD [esp] + cmp esp,ebp + ja NEAR L$001page_walk + jmp NEAR L$002page_walk_done +align 16 +L$001page_walk: + lea esp,[esp-4096] + mov eax,DWORD [esp] + cmp esp,ebp + ja NEAR L$001page_walk +L$002page_walk_done: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov ebp,DWORD [12+esi] + mov esi,DWORD [16+esi] + mov esi,DWORD [esi] + mov DWORD [4+esp],eax + mov DWORD [8+esp],ebx + mov DWORD [12+esp],ecx + mov DWORD [16+esp],ebp + mov DWORD [20+esp],esi + lea ebx,[edi-3] + mov DWORD [24+esp],edx + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$003non_sse2 + mov eax,-1 + movd mm7,eax + mov esi,DWORD [8+esp] + mov edi,DWORD [12+esp] + mov ebp,DWORD [16+esp] + xor edx,edx + xor ecx,ecx + movd mm4,DWORD [edi] + movd mm5,DWORD [esi] + movd mm3,DWORD [ebp] + pmuludq mm5,mm4 + movq mm2,mm5 + movq mm0,mm5 + pand mm0,mm7 + pmuludq mm5,[20+esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm1,DWORD [4+ebp] + movd mm0,DWORD [4+esi] + psrlq mm2,32 + psrlq mm3,32 + inc ecx +align 16 +L$0041st: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + movd mm1,DWORD [4+ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD [4+ecx*4+esi] + psrlq mm2,32 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm3,32 + lea ecx,[1+ecx] + cmp ecx,ebx + jl NEAR L$0041st + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + paddq mm3,mm2 + movq [32+ebx*4+esp],mm3 + inc edx +L$005outer: + xor ecx,ecx + movd mm4,DWORD [edx*4+edi] + movd mm5,DWORD [esi] + movd mm6,DWORD [32+esp] + movd mm3,DWORD [ebp] + pmuludq mm5,mm4 + paddq mm5,mm6 + movq mm0,mm5 + movq mm2,mm5 + pand mm0,mm7 + pmuludq mm5,[20+esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm6,DWORD [36+esp] + movd mm1,DWORD [4+ebp] + movd mm0,DWORD [4+esi] + psrlq mm2,32 + psrlq mm3,32 + paddq mm2,mm6 + inc ecx + dec ebx +L$006inner: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + movd mm6,DWORD [36+ecx*4+esp] + pand mm0,mm7 + movd mm1,DWORD [4+ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD [4+ecx*4+esi] + psrlq mm2,32 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm3,32 + paddq mm2,mm6 + dec ebx + lea ecx,[1+ecx] + jnz NEAR L$006inner + mov ebx,ecx + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + movd mm6,DWORD [36+ebx*4+esp] + paddq mm3,mm2 + paddq mm3,mm6 + movq [32+ebx*4+esp],mm3 + lea edx,[1+edx] + cmp edx,ebx + jle NEAR L$005outer + emms + jmp NEAR L$007common_tail +align 16 +L$003non_sse2: + mov esi,DWORD [8+esp] + lea ebp,[1+ebx] + mov edi,DWORD [12+esp] + xor ecx,ecx + mov edx,esi + and ebp,1 + sub edx,edi + lea eax,[4+ebx*4+edi] + or ebp,edx + mov edi,DWORD [edi] + jz NEAR L$008bn_sqr_mont + mov DWORD [28+esp],eax + mov eax,DWORD [esi] + xor edx,edx +align 16 +L$009mull: + mov ebp,edx + mul edi + add ebp,eax + lea ecx,[1+ecx] + adc edx,0 + mov eax,DWORD [ecx*4+esi] + cmp ecx,ebx + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$009mull + mov ebp,edx + mul edi + mov edi,DWORD [20+esp] + add eax,ebp + mov esi,DWORD [16+esp] + adc edx,0 + imul edi,DWORD [32+esp] + mov DWORD [32+ebx*4+esp],eax + xor ecx,ecx + mov DWORD [36+ebx*4+esp],edx + mov DWORD [40+ebx*4+esp],ecx + mov eax,DWORD [esi] + mul edi + add eax,DWORD [32+esp] + mov eax,DWORD [4+esi] + adc edx,0 + inc ecx + jmp NEAR L$0102ndmadd +align 16 +L$0111stmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$0111stmadd + mov ebp,edx + mul edi + add eax,DWORD [32+ebx*4+esp] + mov edi,DWORD [20+esp] + adc edx,0 + mov esi,DWORD [16+esp] + add ebp,eax + adc edx,0 + imul edi,DWORD [32+esp] + xor ecx,ecx + add edx,DWORD [36+ebx*4+esp] + mov DWORD [32+ebx*4+esp],ebp + adc ecx,0 + mov eax,DWORD [esi] + mov DWORD [36+ebx*4+esp],edx + mov DWORD [40+ebx*4+esp],ecx + mul edi + add eax,DWORD [32+esp] + mov eax,DWORD [4+esi] + adc edx,0 + mov ecx,1 +align 16 +L$0102ndmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [24+ecx*4+esp],ebp + jl NEAR L$0102ndmadd + mov ebp,edx + mul edi + add ebp,DWORD [32+ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD [28+ebx*4+esp],ebp + xor eax,eax + mov ecx,DWORD [12+esp] + add edx,DWORD [36+ebx*4+esp] + adc eax,DWORD [40+ebx*4+esp] + lea ecx,[4+ecx] + mov DWORD [32+ebx*4+esp],edx + cmp ecx,DWORD [28+esp] + mov DWORD [36+ebx*4+esp],eax + je NEAR L$007common_tail + mov edi,DWORD [ecx] + mov esi,DWORD [8+esp] + mov DWORD [12+esp],ecx + xor ecx,ecx + xor edx,edx + mov eax,DWORD [esi] + jmp NEAR L$0111stmadd +align 16 +L$008bn_sqr_mont: + mov DWORD [esp],ebx + mov DWORD [12+esp],ecx + mov eax,edi + mul edi + mov DWORD [32+esp],eax + mov ebx,edx + shr edx,1 + and ebx,1 + inc ecx +align 16 +L$012sqr: + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ecx,[1+ecx] + adc edx,0 + lea ebp,[eax*2+ebx] + shr eax,31 + cmp ecx,DWORD [esp] + mov ebx,eax + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$012sqr + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + mov edi,DWORD [20+esp] + adc edx,0 + mov esi,DWORD [16+esp] + lea ebp,[eax*2+ebx] + imul edi,DWORD [32+esp] + shr eax,31 + mov DWORD [32+ecx*4+esp],ebp + lea ebp,[edx*2+eax] + mov eax,DWORD [esi] + shr edx,31 + mov DWORD [36+ecx*4+esp],ebp + mov DWORD [40+ecx*4+esp],edx + mul edi + add eax,DWORD [32+esp] + mov ebx,ecx + adc edx,0 + mov eax,DWORD [4+esi] + mov ecx,1 +align 16 +L$0133rdmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + adc edx,0 + add ebp,eax + mov eax,DWORD [4+ecx*4+esi] + adc edx,0 + mov DWORD [28+ecx*4+esp],ebp + mov ebp,edx + mul edi + add ebp,DWORD [36+ecx*4+esp] + lea ecx,[2+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [24+ecx*4+esp],ebp + jl NEAR L$0133rdmadd + mov ebp,edx + mul edi + add ebp,DWORD [32+ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD [28+ebx*4+esp],ebp + mov ecx,DWORD [12+esp] + xor eax,eax + mov esi,DWORD [8+esp] + add edx,DWORD [36+ebx*4+esp] + adc eax,DWORD [40+ebx*4+esp] + mov DWORD [32+ebx*4+esp],edx + cmp ecx,ebx + mov DWORD [36+ebx*4+esp],eax + je NEAR L$007common_tail + mov edi,DWORD [4+ecx*4+esi] + lea ecx,[1+ecx] + mov eax,edi + mov DWORD [12+esp],ecx + mul edi + add eax,DWORD [32+ecx*4+esp] + adc edx,0 + mov DWORD [32+ecx*4+esp],eax + xor ebp,ebp + cmp ecx,ebx + lea ecx,[1+ecx] + je NEAR L$014sqrlast + mov ebx,edx + shr edx,1 + and ebx,1 +align 16 +L$015sqradd: + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ebp,[eax*1+eax] + adc edx,0 + shr eax,31 + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc eax,0 + add ebp,ebx + adc eax,0 + cmp ecx,DWORD [esp] + mov DWORD [28+ecx*4+esp],ebp + mov ebx,eax + jle NEAR L$015sqradd + mov ebp,edx + add edx,edx + shr ebp,31 + add edx,ebx + adc ebp,0 +L$014sqrlast: + mov edi,DWORD [20+esp] + mov esi,DWORD [16+esp] + imul edi,DWORD [32+esp] + add edx,DWORD [32+ecx*4+esp] + mov eax,DWORD [esi] + adc ebp,0 + mov DWORD [32+ecx*4+esp],edx + mov DWORD [36+ecx*4+esp],ebp + mul edi + add eax,DWORD [32+esp] + lea ebx,[ecx-1] + adc edx,0 + mov ecx,1 + mov eax,DWORD [4+esi] + jmp NEAR L$0133rdmadd +align 16 +L$007common_tail: + mov ebp,DWORD [16+esp] + mov edi,DWORD [4+esp] + lea esi,[32+esp] + mov eax,DWORD [esi] + mov ecx,ebx + xor edx,edx +align 16 +L$016sub: + sbb eax,DWORD [edx*4+ebp] + mov DWORD [edx*4+edi],eax + dec ecx + mov eax,DWORD [4+edx*4+esi] + lea edx,[1+edx] + jge NEAR L$016sub + sbb eax,0 + mov edx,-1 + xor edx,eax + jmp NEAR L$017copy +align 16 +L$017copy: + mov esi,DWORD [32+ebx*4+esp] + mov ebp,DWORD [ebx*4+edi] + mov DWORD [32+ebx*4+esp],ecx + and esi,eax + and ebp,edx + or ebp,esi + mov DWORD [ebx*4+edi],ebp + dec ebx + jge NEAR L$017copy + mov esp,DWORD [24+esp] + mov eax,1 +L$000just_leave: + pop edi + pop esi + pop ebx + pop ebp + ret +db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +db 111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-apple.S b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-apple.S new file mode 100644 index 0000000000..4bf0c6de79 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-apple.S @@ -0,0 +1,1235 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.globl _bn_mul_mont_nohw +.private_extern _bn_mul_mont_nohw + +.p2align 4 +_bn_mul_mont_nohw: + +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + + negq %r9 + movq %rsp,%r11 + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + + + + + + + + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk + jmp L$mul_page_walk_done + +.p2align 4 +L$mul_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk +L$mul_page_walk_done: + + movq %rax,8(%rsp,%r9,8) + +L$mul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 +L$1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne L$1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp L$outer +.p2align 4 +L$outer: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$inner_enter + +.p2align 4 +L$inner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne L$inner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb L$outer + + xorq %r14,%r14 + movq (%rsp),%rax + movq %r9,%r15 + +.p2align 4 +L$sub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsp,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz L$sub + + sbbq $0,%rax + movq $-1,%rbx + xorq %rax,%rbx + xorq %r14,%r14 + movq %r9,%r15 + +L$copy: + movq (%rdi,%r14,8),%rcx + movq (%rsp,%r14,8),%rdx + andq %rbx,%rcx + andq %rax,%rdx + movq %r9,(%rsp,%r14,8) + orq %rcx,%rdx + movq %rdx,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + + movq $1,%rax + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$mul_epilogue: + ret + + +.globl _bn_mul4x_mont +.private_extern _bn_mul4x_mont + +.p2align 4 +_bn_mul4x_mont: + +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + + negq %r9 + movq %rsp,%r11 + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul4x_page_walk + jmp L$mul4x_page_walk_done + +L$mul4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul4x_page_walk +L$mul4x_page_walk_done: + + movq %rax,8(%rsp,%r9,8) + +L$mul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$1st4x +.p2align 4 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.p2align 2 +L$outer4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$inner4x +.p2align 4 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb L$outer4x + movq 16(%rsp,%r9,8),%rdi + leaq -4(%r9),%r15 + movq 0(%rsp),%rax + movq 8(%rsp),%rdx + shrq $2,%r15 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + +L$sub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz L$sub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + pxor %xmm0,%xmm0 +.byte 102,72,15,110,224 + pcmpeqd %xmm5,%xmm5 + pshufd $0,%xmm4,%xmm4 + movq %r9,%r15 + pxor %xmm4,%xmm5 + shrq $2,%r15 + xorl %eax,%eax + + jmp L$copy4x +.p2align 4 +L$copy4x: + movdqa (%rsp,%rax,1),%xmm1 + movdqu (%rdi,%rax,1),%xmm2 + pand %xmm4,%xmm1 + pand %xmm5,%xmm2 + movdqa 16(%rsp,%rax,1),%xmm3 + movdqa %xmm0,(%rsp,%rax,1) + por %xmm2,%xmm1 + movdqu 16(%rdi,%rax,1),%xmm2 + movdqu %xmm1,(%rdi,%rax,1) + pand %xmm4,%xmm3 + pand %xmm5,%xmm2 + movdqa %xmm0,16(%rsp,%rax,1) + por %xmm2,%xmm3 + movdqu %xmm3,16(%rdi,%rax,1) + leaq 32(%rax),%rax + decq %r15 + jnz L$copy4x + movq 8(%rsp,%r9,8),%rsi + + movq $1,%rax + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$mul4x_epilogue: + ret + + + + + +.globl _bn_sqr8x_mont +.private_extern _bn_sqr8x_mont + +.p2align 5 +_bn_sqr8x_mont: + +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$sqr8x_prologue: + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$sqr8x_sp_alt + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp + jmp L$sqr8x_sp_done + +.p2align 5 +L$sqr8x_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +L$sqr8x_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$sqr8x_page_walk + jmp L$sqr8x_page_walk_done + +.p2align 4 +L$sqr8x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$sqr8x_page_walk +L$sqr8x_page_walk_done: + + movq %r9,%r10 + negq %r9 + + movq %r8,32(%rsp) + movq %rax,40(%rsp) + +L$sqr8x_body: + +.byte 102,72,15,110,209 + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + testq %rdx,%rdx + jz L$sqr8x_nox + + call _bn_sqrx8x_internal + + + + + leaq (%r8,%rcx,1),%rbx + movq %rcx,%r9 + movq %rcx,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp L$sqr8x_sub + +.p2align 5 +L$sqr8x_nox: + call _bn_sqr8x_internal + + + + + leaq (%rdi,%r9,1),%rbx + movq %r9,%rcx + movq %r9,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp L$sqr8x_sub + +.p2align 5 +L$sqr8x_sub: + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + movq 16(%rbx),%r14 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 0(%rbp),%r12 + sbbq 8(%rbp),%r13 + sbbq 16(%rbp),%r14 + sbbq 24(%rbp),%r15 + leaq 32(%rbp),%rbp + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + incq %rcx + jnz L$sqr8x_sub + + sbbq $0,%rax + leaq (%rbx,%r9,1),%rbx + leaq (%rdi,%r9,1),%rdi + +.byte 102,72,15,110,200 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi + + jmp L$sqr8x_cond_copy + +.p2align 5 +L$sqr8x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + movdqa %xmm0,-32(%rbx,%rdx,1) + movdqa %xmm0,-16(%rbx,%rdx,1) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + addq $32,%r9 + jnz L$sqr8x_cond_copy + + movq $1,%rax + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$sqr8x_epilogue: + ret + + +.globl _bn_mulx4x_mont +.private_extern _bn_mulx4x_mont + +.p2align 5 +_bn_mulx4x_mont: + +_CET_ENDBR + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$mulx4x_prologue: + + shll $3,%r9d + xorq %r10,%r10 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,1),%rbp + andq $-128,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk + jmp L$mulx4x_page_walk_done + +.p2align 4 +L$mulx4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk +L$mulx4x_page_walk_done: + + leaq (%rdx,%r9,1),%r10 + + + + + + + + + + + + + movq %r9,0(%rsp) + shrq $5,%r9 + movq %r10,16(%rsp) + subq $1,%r9 + movq %r8,24(%rsp) + movq %rdi,32(%rsp) + movq %rax,40(%rsp) + + movq %r9,48(%rsp) + jmp L$mulx4x_body + +.p2align 5 +L$mulx4x_body: + leaq 8(%rdx),%rdi + movq (%rdx),%rdx + leaq 64+32(%rsp),%rbx + movq %rdx,%r9 + + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r14 + addq %rax,%r11 + movq %rdi,8(%rsp) + mulxq 16(%rsi),%r12,%r13 + adcq %r14,%r12 + adcq $0,%r13 + + movq %r8,%rdi + imulq 24(%rsp),%r8 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%rdi + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 +.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + movq 48(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + + jmp L$mulx4x_1st + +.p2align 5 +L$mulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz L$mulx4x_1st + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + addq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + jmp L$mulx4x_outer + +.p2align 5 +L$mulx4x_outer: + movq (%rdi),%rdx + leaq 8(%rdi),%rdi + subq %rax,%rsi + movq %r15,(%rbx) + leaq 64+32(%rsp),%rbx + subq %rax,%rcx + + mulxq 0(%rsi),%r8,%r11 + xorl %ebp,%ebp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + adoxq -16(%rbx),%r12 + adcxq %rbp,%r13 + adoxq %rbp,%r13 + + movq %rdi,8(%rsp) + movq %r8,%r15 + imulq 24(%rsp),%r8 + xorl %ebp,%ebp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + adcxq %rax,%r13 + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + adoxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + leaq 32(%rcx),%rcx + adcxq %rax,%r12 + adoxq %rbp,%r15 + movq 48(%rsp),%rdi + movq %r12,-16(%rbx) + + jmp L$mulx4x_inner + +.p2align 5 +L$mulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-32(%rbx) + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz L$mulx4x_inner + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + subq 0(%rbx),%rbp + adcq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + + cmpq 16(%rsp),%rdi + jne L$mulx4x_outer + + leaq 64(%rsp),%rbx + subq %rax,%rcx + negq %r15 + movq %rax,%rdx + shrq $3+2,%rax + movq 32(%rsp),%rdi + jmp L$mulx4x_sub + +.p2align 5 +L$mulx4x_sub: + movq 0(%rbx),%r11 + movq 8(%rbx),%r12 + movq 16(%rbx),%r13 + movq 24(%rbx),%r14 + leaq 32(%rbx),%rbx + sbbq 0(%rcx),%r11 + sbbq 8(%rcx),%r12 + sbbq 16(%rcx),%r13 + sbbq 24(%rcx),%r14 + leaq 32(%rcx),%rcx + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + movq %r14,24(%rdi) + leaq 32(%rdi),%rdi + decq %rax + jnz L$mulx4x_sub + + sbbq $0,%r15 + leaq 64(%rsp),%rbx + subq %rdx,%rdi + +.byte 102,73,15,110,207 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi + + jmp L$mulx4x_cond_copy + +.p2align 5 +L$mulx4x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + subq $32,%rdx + jnz L$mulx4x_cond_copy + + movq %rdx,(%rbx) + + movq $1,%rax + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$mulx4x_epilogue: + ret + + +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 4 +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-linux.S b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-linux.S new file mode 100644 index 0000000000..02b282d35e --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-linux.S @@ -0,0 +1,1237 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.globl bn_mul_mont_nohw +.hidden bn_mul_mont_nohw +.type bn_mul_mont_nohw,@function +.align 16 +bn_mul_mont_nohw: +.cfi_startproc +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + negq %r9 + movq %rsp,%r11 + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + + + + + + + + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + +.align 16 +.Lmul_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + + movq %rax,8(%rsp,%r9,8) +.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 +.Lmul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + movq %r9,%r15 + +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsp,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + movq $-1,%rbx + xorq %rax,%rbx + xorq %r14,%r14 + movq %r9,%r15 + +.Lcopy: + movq (%rdi,%r14,8),%rcx + movq (%rsp,%r14,8),%rdx + andq %rbx,%rcx + andq %rax,%rdx + movq %r9,(%rsp,%r14,8) + orq %rcx,%rdx + movq %rdx,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul_epilogue: + ret +.cfi_endproc +.size bn_mul_mont_nohw,.-bn_mul_mont_nohw +.globl bn_mul4x_mont +.hidden bn_mul4x_mont +.type bn_mul4x_mont,@function +.align 16 +bn_mul4x_mont: +.cfi_startproc +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + negq %r9 + movq %rsp,%r11 + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + +.Lmul4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: + + movq %rax,8(%rsp,%r9,8) +.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb .Louter4x + movq 16(%rsp,%r9,8),%rdi + leaq -4(%r9),%r15 + movq 0(%rsp),%rax + movq 8(%rsp),%rdx + shrq $2,%r15 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + pxor %xmm0,%xmm0 +.byte 102,72,15,110,224 + pcmpeqd %xmm5,%xmm5 + pshufd $0,%xmm4,%xmm4 + movq %r9,%r15 + pxor %xmm4,%xmm5 + shrq $2,%r15 + xorl %eax,%eax + + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqa (%rsp,%rax,1),%xmm1 + movdqu (%rdi,%rax,1),%xmm2 + pand %xmm4,%xmm1 + pand %xmm5,%xmm2 + movdqa 16(%rsp,%rax,1),%xmm3 + movdqa %xmm0,(%rsp,%rax,1) + por %xmm2,%xmm1 + movdqu 16(%rdi,%rax,1),%xmm2 + movdqu %xmm1,(%rdi,%rax,1) + pand %xmm4,%xmm3 + pand %xmm5,%xmm2 + movdqa %xmm0,16(%rsp,%rax,1) + por %xmm2,%xmm3 + movdqu %xmm3,16(%rdi,%rax,1) + leaq 32(%rax),%rax + decq %r15 + jnz .Lcopy4x + movq 8(%rsp,%r9,8),%rsi +.cfi_def_cfa %rsi, 8 + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul4x_epilogue: + ret +.cfi_endproc +.size bn_mul4x_mont,.-bn_mul4x_mont +.extern bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.extern bn_sqr8x_internal +.hidden bn_sqr8x_internal + +.globl bn_sqr8x_mont +.hidden bn_sqr8x_mont +.type bn_sqr8x_mont,@function +.align 32 +bn_sqr8x_mont: +.cfi_startproc +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lsqr8x_prologue: + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lsqr8x_sp_alt + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp + jmp .Lsqr8x_sp_done + +.align 32 +.Lsqr8x_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lsqr8x_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk + jmp .Lsqr8x_page_walk_done + +.align 16 +.Lsqr8x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk +.Lsqr8x_page_walk_done: + + movq %r9,%r10 + negq %r9 + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lsqr8x_body: + +.byte 102,72,15,110,209 + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + testq %rdx,%rdx + jz .Lsqr8x_nox + + call bn_sqrx8x_internal + + + + + leaq (%r8,%rcx,1),%rbx + movq %rcx,%r9 + movq %rcx,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp .Lsqr8x_sub + +.align 32 +.Lsqr8x_nox: + call bn_sqr8x_internal + + + + + leaq (%rdi,%r9,1),%rbx + movq %r9,%rcx + movq %r9,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp .Lsqr8x_sub + +.align 32 +.Lsqr8x_sub: + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + movq 16(%rbx),%r14 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 0(%rbp),%r12 + sbbq 8(%rbp),%r13 + sbbq 16(%rbp),%r14 + sbbq 24(%rbp),%r15 + leaq 32(%rbp),%rbp + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + incq %rcx + jnz .Lsqr8x_sub + + sbbq $0,%rax + leaq (%rbx,%r9,1),%rbx + leaq (%rdi,%r9,1),%rdi + +.byte 102,72,15,110,200 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + jmp .Lsqr8x_cond_copy + +.align 32 +.Lsqr8x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + movdqa %xmm0,-32(%rbx,%rdx,1) + movdqa %xmm0,-16(%rbx,%rdx,1) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + addq $32,%r9 + jnz .Lsqr8x_cond_copy + + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lsqr8x_epilogue: + ret +.cfi_endproc +.size bn_sqr8x_mont,.-bn_sqr8x_mont +.globl bn_mulx4x_mont +.hidden bn_mulx4x_mont +.type bn_mulx4x_mont,@function +.align 32 +bn_mulx4x_mont: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lmulx4x_prologue: + + shll $3,%r9d + xorq %r10,%r10 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,1),%rbp + andq $-128,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk + jmp .Lmulx4x_page_walk_done + +.align 16 +.Lmulx4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk +.Lmulx4x_page_walk_done: + + leaq (%rdx,%r9,1),%r10 + + + + + + + + + + + + + movq %r9,0(%rsp) + shrq $5,%r9 + movq %r10,16(%rsp) + subq $1,%r9 + movq %r8,24(%rsp) + movq %rdi,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 + movq %r9,48(%rsp) + jmp .Lmulx4x_body + +.align 32 +.Lmulx4x_body: + leaq 8(%rdx),%rdi + movq (%rdx),%rdx + leaq 64+32(%rsp),%rbx + movq %rdx,%r9 + + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r14 + addq %rax,%r11 + movq %rdi,8(%rsp) + mulxq 16(%rsi),%r12,%r13 + adcq %r14,%r12 + adcq $0,%r13 + + movq %r8,%rdi + imulq 24(%rsp),%r8 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%rdi + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 +.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + movq 48(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + addq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + movq (%rdi),%rdx + leaq 8(%rdi),%rdi + subq %rax,%rsi + movq %r15,(%rbx) + leaq 64+32(%rsp),%rbx + subq %rax,%rcx + + mulxq 0(%rsi),%r8,%r11 + xorl %ebp,%ebp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + adoxq -16(%rbx),%r12 + adcxq %rbp,%r13 + adoxq %rbp,%r13 + + movq %rdi,8(%rsp) + movq %r8,%r15 + imulq 24(%rsp),%r8 + xorl %ebp,%ebp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + adcxq %rax,%r13 + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + adoxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + leaq 32(%rcx),%rcx + adcxq %rax,%r12 + adoxq %rbp,%r15 + movq 48(%rsp),%rdi + movq %r12,-16(%rbx) + + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-32(%rbx) + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + subq 0(%rbx),%rbp + adcq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + + cmpq 16(%rsp),%rdi + jne .Lmulx4x_outer + + leaq 64(%rsp),%rbx + subq %rax,%rcx + negq %r15 + movq %rax,%rdx + shrq $3+2,%rax + movq 32(%rsp),%rdi + jmp .Lmulx4x_sub + +.align 32 +.Lmulx4x_sub: + movq 0(%rbx),%r11 + movq 8(%rbx),%r12 + movq 16(%rbx),%r13 + movq 24(%rbx),%r14 + leaq 32(%rbx),%rbx + sbbq 0(%rcx),%r11 + sbbq 8(%rcx),%r12 + sbbq 16(%rcx),%r13 + sbbq 24(%rcx),%r14 + leaq 32(%rcx),%rcx + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + movq %r14,24(%rdi) + leaq 32(%rdi),%rdi + decq %rax + jnz .Lmulx4x_sub + + sbbq $0,%r15 + leaq 64(%rsp),%rbx + subq %rdx,%rdi + +.byte 102,73,15,110,207 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + jmp .Lmulx4x_cond_copy + +.align 32 +.Lmulx4x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + subq $32,%rdx + jnz .Lmulx4x_cond_copy + + movq %rdx,(%rbx) + + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmulx4x_epilogue: + ret +.cfi_endproc +.size bn_mulx4x_mont,.-bn_mulx4x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-win.asm b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-win.asm new file mode 100644 index 0000000000..b0611fc664 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont-win.asm @@ -0,0 +1,1470 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +global bn_mul_mont_nohw + +ALIGN 16 +bn_mul_mont_nohw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont_nohw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + mov r9d,r9d + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + + neg r9 + mov r11,rsp + lea r10,[((-16))+r9*8+rsp] + neg r9 + and r10,-1024 + + + + + + + + + + sub r11,r10 + and r11,-4096 + lea rsp,[r11*1+r10] + mov r11,QWORD[rsp] + cmp rsp,r10 + ja NEAR $L$mul_page_walk + jmp NEAR $L$mul_page_walk_done + +ALIGN 16 +$L$mul_page_walk: + lea rsp,[((-4096))+rsp] + mov r11,QWORD[rsp] + cmp rsp,r10 + ja NEAR $L$mul_page_walk +$L$mul_page_walk_done: + + mov QWORD[8+r9*8+rsp],rax + +$L$mul_body: + mov r12,rdx + mov r8,QWORD[r8] + mov rbx,QWORD[r12] + mov rax,QWORD[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$1st_enter + +ALIGN 16 +$L$1st: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + lea r15,[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne NEAR $L$1st + + add r13,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + jmp NEAR $L$outer +ALIGN 16 +$L$outer: + mov rbx,QWORD[r14*8+r12] + xor r15,r15 + mov rbp,r8 + mov r10,QWORD[rsp] + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r10,QWORD[8+rsp] + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$inner_enter + +ALIGN 16 +$L$inner: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,[1+r15] + + mul rbp + cmp r15,r9 + jne NEAR $L$inner + + add r13,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + cmp r14,r9 + jb NEAR $L$outer + + xor r14,r14 + mov rax,QWORD[rsp] + mov r15,r9 + +ALIGN 16 +$L$sub: sbb rax,QWORD[r14*8+rcx] + mov QWORD[r14*8+rdi],rax + mov rax,QWORD[8+r14*8+rsp] + lea r14,[1+r14] + dec r15 + jnz NEAR $L$sub + + sbb rax,0 + mov rbx,-1 + xor rbx,rax + xor r14,r14 + mov r15,r9 + +$L$copy: + mov rcx,QWORD[r14*8+rdi] + mov rdx,QWORD[r14*8+rsp] + and rcx,rbx + and rdx,rax + mov QWORD[r14*8+rsp],r9 + or rdx,rcx + mov QWORD[r14*8+rdi],rdx + lea r14,[1+r14] + sub r15,1 + jnz NEAR $L$copy + + mov rsi,QWORD[8+r9*8+rsp] + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_mul_mont_nohw: +global bn_mul4x_mont + +ALIGN 16 +bn_mul4x_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + mov r9d,r9d + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + + neg r9 + mov r11,rsp + lea r10,[((-32))+r9*8+rsp] + neg r9 + and r10,-1024 + + sub r11,r10 + and r11,-4096 + lea rsp,[r11*1+r10] + mov r11,QWORD[rsp] + cmp rsp,r10 + ja NEAR $L$mul4x_page_walk + jmp NEAR $L$mul4x_page_walk_done + +$L$mul4x_page_walk: + lea rsp,[((-4096))+rsp] + mov r11,QWORD[rsp] + cmp rsp,r10 + ja NEAR $L$mul4x_page_walk +$L$mul4x_page_walk_done: + + mov QWORD[8+r9*8+rsp],rax + +$L$mul4x_body: + mov QWORD[16+r9*8+rsp],rdi + mov r12,rdx + mov r8,QWORD[r8] + mov rbx,QWORD[r12] + mov rax,QWORD[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[4+r15] + adc rdx,0 + mov QWORD[rsp],rdi + mov r13,rdx + jmp NEAR $L$1st4x +ALIGN 16 +$L$1st4x: + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+r15*8+rcx] + adc rdx,0 + lea r15,[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb NEAR $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov QWORD[r15*8+rsp],rdi + + lea r14,[1+r14] +ALIGN 4 +$L$outer4x: + mov rbx,QWORD[r14*8+r12] + xor r15,r15 + mov r10,QWORD[rsp] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + add r11,QWORD[8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[4+r15] + adc rdx,0 + mov QWORD[rsp],rdi + mov r13,rdx + jmp NEAR $L$inner4x +ALIGN 16 +$L$inner4x: + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r15*8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + add r10,QWORD[r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+r15*8+rcx] + adc rdx,0 + add r11,QWORD[8+r15*8+rsp] + adc rdx,0 + lea r15,[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb NEAR $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r15*8+rsp] + adc rdx,0 + lea r14,[1+r14] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD[r9*8+rsp] + adc rdi,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov QWORD[r15*8+rsp],rdi + + cmp r14,r9 + jb NEAR $L$outer4x + mov rdi,QWORD[16+r9*8+rsp] + lea r15,[((-4))+r9] + mov rax,QWORD[rsp] + mov rdx,QWORD[8+rsp] + shr r15,2 + lea rsi,[rsp] + xor r14,r14 + + sub rax,QWORD[rcx] + mov rbx,QWORD[16+rsi] + mov rbp,QWORD[24+rsi] + sbb rdx,QWORD[8+rcx] + +$L$sub4x: + mov QWORD[r14*8+rdi],rax + mov QWORD[8+r14*8+rdi],rdx + sbb rbx,QWORD[16+r14*8+rcx] + mov rax,QWORD[32+r14*8+rsi] + mov rdx,QWORD[40+r14*8+rsi] + sbb rbp,QWORD[24+r14*8+rcx] + mov QWORD[16+r14*8+rdi],rbx + mov QWORD[24+r14*8+rdi],rbp + sbb rax,QWORD[32+r14*8+rcx] + mov rbx,QWORD[48+r14*8+rsi] + mov rbp,QWORD[56+r14*8+rsi] + sbb rdx,QWORD[40+r14*8+rcx] + lea r14,[4+r14] + dec r15 + jnz NEAR $L$sub4x + + mov QWORD[r14*8+rdi],rax + mov rax,QWORD[32+r14*8+rsi] + sbb rbx,QWORD[16+r14*8+rcx] + mov QWORD[8+r14*8+rdi],rdx + sbb rbp,QWORD[24+r14*8+rcx] + mov QWORD[16+r14*8+rdi],rbx + + sbb rax,0 + mov QWORD[24+r14*8+rdi],rbp + pxor xmm0,xmm0 +DB 102,72,15,110,224 + pcmpeqd xmm5,xmm5 + pshufd xmm4,xmm4,0 + mov r15,r9 + pxor xmm5,xmm4 + shr r15,2 + xor eax,eax + + jmp NEAR $L$copy4x +ALIGN 16 +$L$copy4x: + movdqa xmm1,XMMWORD[rax*1+rsp] + movdqu xmm2,XMMWORD[rax*1+rdi] + pand xmm1,xmm4 + pand xmm2,xmm5 + movdqa xmm3,XMMWORD[16+rax*1+rsp] + movdqa XMMWORD[rax*1+rsp],xmm0 + por xmm1,xmm2 + movdqu xmm2,XMMWORD[16+rax*1+rdi] + movdqu XMMWORD[rax*1+rdi],xmm1 + pand xmm3,xmm4 + pand xmm2,xmm5 + movdqa XMMWORD[16+rax*1+rsp],xmm0 + por xmm3,xmm2 + movdqu XMMWORD[16+rax*1+rdi],xmm3 + lea rax,[32+rax] + dec r15 + jnz NEAR $L$copy4x + mov rsi,QWORD[8+r9*8+rsp] + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$mul4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_mul4x_mont: +EXTERN bn_sqrx8x_internal +EXTERN bn_sqr8x_internal + +global bn_sqr8x_mont + +ALIGN 32 +bn_sqr8x_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_sqr8x_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + mov r9d,r9d + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$sqr8x_prologue: + + mov r10d,r9d + shl r9d,3 + shl r10,3+2 + neg r9 + + + + + + + lea r11,[((-64))+r9*2+rsp] + mov rbp,rsp + mov r8,QWORD[r8] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb NEAR $L$sqr8x_sp_alt + sub rbp,r11 + lea rbp,[((-64))+r9*2+rbp] + jmp NEAR $L$sqr8x_sp_done + +ALIGN 32 +$L$sqr8x_sp_alt: + lea r10,[((4096-64))+r9*2] + lea rbp,[((-64))+r9*2+rbp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rbp,r11 +$L$sqr8x_sp_done: + and rbp,-64 + mov r11,rsp + sub r11,rbp + and r11,-4096 + lea rsp,[rbp*1+r11] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$sqr8x_page_walk + jmp NEAR $L$sqr8x_page_walk_done + +ALIGN 16 +$L$sqr8x_page_walk: + lea rsp,[((-4096))+rsp] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$sqr8x_page_walk +$L$sqr8x_page_walk_done: + + mov r10,r9 + neg r9 + + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax + +$L$sqr8x_body: + +DB 102,72,15,110,209 + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,73,15,110,218 + test rdx,rdx + jz NEAR $L$sqr8x_nox + + call bn_sqrx8x_internal + + + + + lea rbx,[rcx*1+r8] + mov r9,rcx + mov rdx,rcx +DB 102,72,15,126,207 + sar rcx,3+2 + jmp NEAR $L$sqr8x_sub + +ALIGN 32 +$L$sqr8x_nox: + call bn_sqr8x_internal + + + + + lea rbx,[r9*1+rdi] + mov rcx,r9 + mov rdx,r9 +DB 102,72,15,126,207 + sar rcx,3+2 + jmp NEAR $L$sqr8x_sub + +ALIGN 32 +$L$sqr8x_sub: + mov r12,QWORD[rbx] + mov r13,QWORD[8+rbx] + mov r14,QWORD[16+rbx] + mov r15,QWORD[24+rbx] + lea rbx,[32+rbx] + sbb r12,QWORD[rbp] + sbb r13,QWORD[8+rbp] + sbb r14,QWORD[16+rbp] + sbb r15,QWORD[24+rbp] + lea rbp,[32+rbp] + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + lea rdi,[32+rdi] + inc rcx + jnz NEAR $L$sqr8x_sub + + sbb rax,0 + lea rbx,[r9*1+rbx] + lea rdi,[r9*1+rdi] + +DB 102,72,15,110,200 + pxor xmm0,xmm0 + pshufd xmm1,xmm1,0 + mov rsi,QWORD[40+rsp] + + jmp NEAR $L$sqr8x_cond_copy + +ALIGN 32 +$L$sqr8x_cond_copy: + movdqa xmm2,XMMWORD[rbx] + movdqa xmm3,XMMWORD[16+rbx] + lea rbx,[32+rbx] + movdqu xmm4,XMMWORD[rdi] + movdqu xmm5,XMMWORD[16+rdi] + lea rdi,[32+rdi] + movdqa XMMWORD[(-32)+rbx],xmm0 + movdqa XMMWORD[(-16)+rbx],xmm0 + movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 + movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 + pcmpeqd xmm0,xmm1 + pand xmm2,xmm1 + pand xmm3,xmm1 + pand xmm4,xmm0 + pand xmm5,xmm0 + pxor xmm0,xmm0 + por xmm4,xmm2 + por xmm5,xmm3 + movdqu XMMWORD[(-32)+rdi],xmm4 + movdqu XMMWORD[(-16)+rdi],xmm5 + add r9,32 + jnz NEAR $L$sqr8x_cond_copy + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$sqr8x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_sqr8x_mont: +global bn_mulx4x_mont + +ALIGN 32 +bn_mulx4x_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mulx4x_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$mulx4x_prologue: + + shl r9d,3 + xor r10,r10 + sub r10,r9 + mov r8,QWORD[r8] + lea rbp,[((-72))+r10*1+rsp] + and rbp,-128 + mov r11,rsp + sub r11,rbp + and r11,-4096 + lea rsp,[rbp*1+r11] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$mulx4x_page_walk + jmp NEAR $L$mulx4x_page_walk_done + +ALIGN 16 +$L$mulx4x_page_walk: + lea rsp,[((-4096))+rsp] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$mulx4x_page_walk +$L$mulx4x_page_walk_done: + + lea r10,[r9*1+rdx] + + + + + + + + + + + + + mov QWORD[rsp],r9 + shr r9,5 + mov QWORD[16+rsp],r10 + sub r9,1 + mov QWORD[24+rsp],r8 + mov QWORD[32+rsp],rdi + mov QWORD[40+rsp],rax + + mov QWORD[48+rsp],r9 + jmp NEAR $L$mulx4x_body + +ALIGN 32 +$L$mulx4x_body: + lea rdi,[8+rdx] + mov rdx,QWORD[rdx] + lea rbx,[((64+32))+rsp] + mov r9,rdx + + mulx rax,r8,QWORD[rsi] + mulx r14,r11,QWORD[8+rsi] + add r11,rax + mov QWORD[8+rsp],rdi + mulx r13,r12,QWORD[16+rsi] + adc r12,r14 + adc r13,0 + + mov rdi,r8 + imul r8,QWORD[24+rsp] + xor rbp,rbp + + mulx r14,rax,QWORD[24+rsi] + mov rdx,r8 + lea rsi,[32+rsi] + adcx r13,rax + adcx r14,rbp + + mulx r10,rax,QWORD[rcx] + adcx rdi,rax + adox r10,r11 + mulx r11,rax,QWORD[8+rcx] + adcx r10,rax + adox r11,r12 + DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + mov rdi,QWORD[48+rsp] + mov QWORD[((-32))+rbx],r10 + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov QWORD[((-24))+rbx],r11 + adcx r12,rax + adox r15,rbp + lea rcx,[32+rcx] + mov QWORD[((-16))+rbx],r12 + + jmp NEAR $L$mulx4x_1st + +ALIGN 32 +$L$mulx4x_1st: + adcx r15,rbp + mulx rax,r10,QWORD[rsi] + adcx r10,r14 + mulx r14,r11,QWORD[8+rsi] + adcx r11,rax + mulx rax,r12,QWORD[16+rsi] + adcx r12,r14 + mulx r14,r13,QWORD[24+rsi] + DB 0x67,0x67 + mov rdx,r8 + adcx r13,rax + adcx r14,rbp + lea rsi,[32+rsi] + lea rbx,[32+rbx] + + adox r10,r15 + mulx r15,rax,QWORD[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD[8+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD[16+rcx] + mov QWORD[((-40))+rbx],r10 + adcx r12,rax + mov QWORD[((-32))+rbx],r11 + adox r13,r15 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov QWORD[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + lea rcx,[32+rcx] + mov QWORD[((-16))+rbx],r13 + + dec rdi + jnz NEAR $L$mulx4x_1st + + mov rax,QWORD[rsp] + mov rdi,QWORD[8+rsp] + adc r15,rbp + add r14,r15 + sbb r15,r15 + mov QWORD[((-8))+rbx],r14 + jmp NEAR $L$mulx4x_outer + +ALIGN 32 +$L$mulx4x_outer: + mov rdx,QWORD[rdi] + lea rdi,[8+rdi] + sub rsi,rax + mov QWORD[rbx],r15 + lea rbx,[((64+32))+rsp] + sub rcx,rax + + mulx r11,r8,QWORD[rsi] + xor ebp,ebp + mov r9,rdx + mulx r12,r14,QWORD[8+rsi] + adox r8,QWORD[((-32))+rbx] + adcx r11,r14 + mulx r13,r15,QWORD[16+rsi] + adox r11,QWORD[((-24))+rbx] + adcx r12,r15 + adox r12,QWORD[((-16))+rbx] + adcx r13,rbp + adox r13,rbp + + mov QWORD[8+rsp],rdi + mov r15,r8 + imul r8,QWORD[24+rsp] + xor ebp,ebp + + mulx r14,rax,QWORD[24+rsi] + mov rdx,r8 + adcx r13,rax + adox r13,QWORD[((-8))+rbx] + adcx r14,rbp + lea rsi,[32+rsi] + adox r14,rbp + + mulx r10,rax,QWORD[rcx] + adcx r15,rax + adox r10,r11 + mulx r11,rax,QWORD[8+rcx] + adcx r10,rax + adox r11,r12 + mulx r12,rax,QWORD[16+rcx] + mov QWORD[((-32))+rbx],r10 + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov QWORD[((-24))+rbx],r11 + lea rcx,[32+rcx] + adcx r12,rax + adox r15,rbp + mov rdi,QWORD[48+rsp] + mov QWORD[((-16))+rbx],r12 + + jmp NEAR $L$mulx4x_inner + +ALIGN 32 +$L$mulx4x_inner: + mulx rax,r10,QWORD[rsi] + adcx r15,rbp + adox r10,r14 + mulx r14,r11,QWORD[8+rsi] + adcx r10,QWORD[rbx] + adox r11,rax + mulx rax,r12,QWORD[16+rsi] + adcx r11,QWORD[8+rbx] + adox r12,r14 + mulx r14,r13,QWORD[24+rsi] + mov rdx,r8 + adcx r12,QWORD[16+rbx] + adox r13,rax + adcx r13,QWORD[24+rbx] + adox r14,rbp + lea rsi,[32+rsi] + lea rbx,[32+rbx] + adcx r14,rbp + + adox r10,r15 + mulx r15,rax,QWORD[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD[8+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD[16+rcx] + mov QWORD[((-40))+rbx],r10 + adcx r12,rax + adox r13,r15 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov QWORD[((-32))+rbx],r11 + mov QWORD[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + lea rcx,[32+rcx] + mov QWORD[((-16))+rbx],r13 + + dec rdi + jnz NEAR $L$mulx4x_inner + + mov rax,QWORD[rsp] + mov rdi,QWORD[8+rsp] + adc r15,rbp + sub rbp,QWORD[rbx] + adc r14,r15 + sbb r15,r15 + mov QWORD[((-8))+rbx],r14 + + cmp rdi,QWORD[16+rsp] + jne NEAR $L$mulx4x_outer + + lea rbx,[64+rsp] + sub rcx,rax + neg r15 + mov rdx,rax + shr rax,3+2 + mov rdi,QWORD[32+rsp] + jmp NEAR $L$mulx4x_sub + +ALIGN 32 +$L$mulx4x_sub: + mov r11,QWORD[rbx] + mov r12,QWORD[8+rbx] + mov r13,QWORD[16+rbx] + mov r14,QWORD[24+rbx] + lea rbx,[32+rbx] + sbb r11,QWORD[rcx] + sbb r12,QWORD[8+rcx] + sbb r13,QWORD[16+rcx] + sbb r14,QWORD[24+rcx] + lea rcx,[32+rcx] + mov QWORD[rdi],r11 + mov QWORD[8+rdi],r12 + mov QWORD[16+rdi],r13 + mov QWORD[24+rdi],r14 + lea rdi,[32+rdi] + dec rax + jnz NEAR $L$mulx4x_sub + + sbb r15,0 + lea rbx,[64+rsp] + sub rdi,rdx + +DB 102,73,15,110,207 + pxor xmm0,xmm0 + pshufd xmm1,xmm1,0 + mov rsi,QWORD[40+rsp] + + jmp NEAR $L$mulx4x_cond_copy + +ALIGN 32 +$L$mulx4x_cond_copy: + movdqa xmm2,XMMWORD[rbx] + movdqa xmm3,XMMWORD[16+rbx] + lea rbx,[32+rbx] + movdqu xmm4,XMMWORD[rdi] + movdqu xmm5,XMMWORD[16+rdi] + lea rdi,[32+rdi] + movdqa XMMWORD[(-32)+rbx],xmm0 + movdqa XMMWORD[(-16)+rbx],xmm0 + pcmpeqd xmm0,xmm1 + pand xmm2,xmm1 + pand xmm3,xmm1 + pand xmm4,xmm0 + pand xmm5,xmm0 + pxor xmm0,xmm0 + por xmm4,xmm2 + por xmm5,xmm3 + movdqu XMMWORD[(-32)+rdi],xmm4 + movdqu XMMWORD[(-16)+rdi],xmm5 + sub rdx,32 + jnz NEAR $L$mulx4x_cond_copy + + mov QWORD[rbx],rdx + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$mulx4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_mulx4x_mont: + DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 + DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 + DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 + DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 + DB 115,108,46,111,114,103,62,0 +ALIGN 16 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +mul_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov r10,QWORD[192+r8] + mov rax,QWORD[8+r10*8+rax] + + jmp NEAR $L$common_pop_regs + + + +ALIGN 16 +sqr_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_pop_regs + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[8+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[40+rax] + +$L$common_pop_regs: + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_bn_mul_mont_nohw wrt ..imagebase + DD $L$SEH_end_bn_mul_mont_nohw wrt ..imagebase + DD $L$SEH_info_bn_mul_mont_nohw wrt ..imagebase + + DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase + DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase + DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase + + DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase + DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase + DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase + DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase + DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase + DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_bn_mul_mont_nohw: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase +$L$SEH_info_bn_mul4x_mont: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase +$L$SEH_info_bn_sqr8x_mont: + DB 9,0,0,0 + DD sqr_handler wrt ..imagebase + DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_mulx4x_mont: + DB 9,0,0,0 + DD sqr_handler wrt ..imagebase + DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase +ALIGN 8 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-apple.S b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-apple.S new file mode 100644 index 0000000000..bd63d91c8a --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-apple.S @@ -0,0 +1,3624 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + + +.globl _bn_mul_mont_gather5 +.private_extern _bn_mul_mont_gather5 + +.p2align 6 +_bn_mul_mont_gather5: + +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax + + testl $7,%r9d + jnz L$mul_enter + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl 8(%r11),%r11d + jmp L$mul4x_enter + +.p2align 4 +L$mul_enter: + movd 8(%rsp),%xmm5 + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + + + + + + + + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk + jmp L$mul_page_walk_done + +L$mul_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk +L$mul_page_walk_done: + + leaq L$inc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) + +L$mul_body: + + leaq 128(%rdx),%r12 + movdqa 0(%r10),%xmm0 + movdqa 16(%r10),%xmm1 + leaq 24-112(%rsp,%r9,8),%r10 + andq $-16,%r10 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 +L$1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne L$1st + + + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r9,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp L$outer +.p2align 4 +L$outer: + leaq 24+128(%rsp,%r9,8),%rdx + andq $-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 + + movq (%rsi),%rax +.byte 102,72,15,126,195 + + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$inner_enter + +.p2align 4 +L$inner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne L$inner + + addq %rax,%r13 + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r9,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r9,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb L$outer + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp L$sub +.p2align 4 +L$sub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz L$sub + + sbbq $0,%rax + movq $-1,%rbx + xorq %rax,%rbx + xorq %r14,%r14 + movq %r9,%r15 + +L$copy: + movq (%rdi,%r14,8),%rcx + movq (%rsp,%r14,8),%rdx + andq %rbx,%rcx + andq %rax,%rdx + movq %r14,(%rsp,%r14,8) + orq %rcx,%rdx + movq %rdx,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + + movq $1,%rax + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$mul_epilogue: + ret + + + +.p2align 5 +bn_mul4x_mont_gather5: + +.byte 0x67 + movq %rsp,%rax + +L$mul4x_enter: + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je L$mulx4x_enter + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$mul4x_prologue: + +.byte 0x67 + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$mul4xsp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp L$mul4xsp_done + +.p2align 5 +L$mul4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +L$mul4xsp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mul4x_page_walk + jmp L$mul4x_page_walk_done + +L$mul4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mul4x_page_walk +L$mul4x_page_walk_done: + + negq %r9 + + movq %rax,40(%rsp) + +L$mul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi + + movq $1,%rax + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$mul4x_epilogue: + ret + + + + +.p2align 5 +mul4x_internal: + + shlq $5,%r9 + movd 8(%rax),%xmm5 + leaq L$inc(%rip),%rax + leaq 128(%rdx,%r9,1),%r13 + shrq $5,%r9 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r9,1),%r10 + leaq 128(%rdx),%r12 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67,0x67 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + leaq 64+8(%rsp),%r14 + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp L$1st4x + +.p2align 5 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + leaq (%rcx,%r9,1),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp L$outer4x + +.p2align 5 +L$outer4x: + leaq 16+128(%r14),%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + movq %rdi,(%r14) + + leaq (%r14,%r9,1),%r14 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp L$inner4x + +.p2align 5 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -8(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + movq %rdi,-16(%r14) + leaq (%rcx,%r9,1),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb L$outer4x + xorq %rax,%rax + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + subq %rdi,%rax + leaq (%r14,%r9,1),%rbx + movq (%rcx),%r12 + leaq (%rcx),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + decq %r12 + xorq %r10,%r10 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp L$sqr4x_sub_entry + + +.globl _bn_power5 +.private_extern _bn_power5 + +.p2align 5 +_bn_power5: + +_CET_ENDBR + movq %rsp,%rax + + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl 8(%r11),%r11d + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je L$powerx5_enter + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$power5_prologue: + + shll $3,%r9d + leal (%r9,%r9,2),%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$pwr_sp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp L$pwr_sp_done + +.p2align 5 +L$pwr_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +L$pwr_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwr_page_walk + jmp L$pwr_page_walk_done + +L$pwr_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwr_page_walk +L$pwr_page_walk_done: + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) + +L$power5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi + + movq $1,%rax + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$power5_epilogue: + ret + + + +.globl _bn_sqr8x_internal +.private_extern _bn_sqr8x_internal +.private_extern _bn_sqr8x_internal + +.p2align 5 +_bn_sqr8x_internal: +__bn_sqr8x_internal: + +_CET_ENDBR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp L$sqr4x_1st + +.p2align 5 +L$sqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp L$sqr4x_outer + +.p2align 5 +L$sqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp L$sqr4x_inner + +.p2align 5 +L$sqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz L$sqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp L$sqr4x_shift_n_add + +.p2align 5 +L$sqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz L$sqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +__bn_sqr8x_reduction: + xorq %rax,%rax + leaq (%r9,%rbp,1),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp L$8x_reduction_loop + +.p2align 5 +L$8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$8x_reduce + +.p2align 5 +L$8x_reduce: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_reduce + + leaq 64(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp L$8x_tail + +.p2align 5 +L$8x_tail: + mulq %rbx + addq %rax,%r8 + movq 8(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_tail + + leaq 64(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp L$8x_tail + +.p2align 5 +L$8x_tail_done: + xorq %rax,%rax + addq (%rdx),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rax + + negq %rsi +L$8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -8(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb L$8x_reduction_loop + ret + + + +.p2align 5 +__bn_post4x_internal: + + movq 0(%rbp),%r12 + leaq (%rdi,%r9,1),%rbx + movq %r9,%rcx +.byte 102,72,15,126,207 + negq %rax +.byte 102,72,15,126,206 + sarq $3+2,%rcx + decq %r12 + xorq %r10,%r10 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp L$sqr4x_sub_entry + +.p2align 4 +L$sqr4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +L$sqr4x_sub_entry: + leaq 32(%rbp),%rbp + notq %r12 + notq %r13 + notq %r14 + notq %r15 + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + + negq %r10 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + adcq 16(%rbx),%r14 + adcq 24(%rbx),%r15 + movq %r12,0(%rdi) + leaq 32(%rbx),%rbx + movq %r13,8(%rdi) + sbbq %r10,%r10 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz L$sqr4x_sub + + movq %r9,%r10 + negq %r9 + ret + + + +.p2align 5 +bn_mulx4x_mont_gather5: + + movq %rsp,%rax + +L$mulx4x_enter: + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$mulx4x_prologue: + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$mulx4xsp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp L$mulx4xsp_done + +L$mulx4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +L$mulx4xsp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk + jmp L$mulx4x_page_walk_done + +L$mulx4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk +L$mulx4x_page_walk_done: + + + + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) + +L$mulx4x_body: + call mulx4x_internal + + movq 40(%rsp),%rsi + + movq $1,%rax + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$mulx4x_epilogue: + ret + + + + +.p2align 5 +mulx4x_internal: + + movq %r9,8(%rsp) + movq %r9,%r10 + negq %r9 + shlq $5,%r9 + negq %r10 + leaq 128(%rdx,%r9,1),%r13 + shrq $5+5,%r9 + movd 8(%rax),%xmm5 + subq $1,%r9 + leaq L$inc(%rip),%rax + movq %r13,16+8(%rsp) + movq %r9,24+8(%rsp) + movq %rdi,56+8(%rsp) + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r10,1),%r10 + leaq 128(%rdx),%rdi + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67 + movdqa %xmm1,%xmm2 +.byte 0x67 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 +.byte 0x67 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + + pand 64(%rdi),%xmm0 + pand 80(%rdi),%xmm1 + pand 96(%rdi),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%rdi),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%rdi),%xmm4 + movdqa -112(%rdi),%xmm5 + movdqa -96(%rdi),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%rdi),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%rdi),%xmm4 + movdqa -48(%rdi),%xmm5 + movdqa -32(%rdi),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%rdi),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%rdi),%xmm4 + movdqa 16(%rdi),%xmm5 + movdqa 32(%rdi),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%rdi),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + pxor %xmm1,%xmm0 + + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + leaq 64+32+8(%rsp),%rbx + + movq %rdx,%r9 + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r12 + addq %rax,%r11 + mulxq 16(%rsi),%rax,%r13 + adcq %rax,%r12 + adcq $0,%r13 + mulxq 24(%rsi),%rax,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + xorq %rbp,%rbp + movq %r8,%rdx + + movq %rdi,8+8(%rsp) + + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + jmp L$mulx4x_1st + +.p2align 5 +L$mulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz L$mulx4x_1st + + movq 8(%rsp),%rax + adcq %rbp,%r15 + leaq (%rsi,%rax,1),%rsi + addq %r15,%r14 + movq 8+8(%rsp),%rdi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + jmp L$mulx4x_outer + +.p2align 5 +L$mulx4x_outer: + leaq 16-256(%rbx),%r10 + pxor %xmm4,%xmm4 +.byte 0x67,0x67 + pxor %xmm5,%xmm5 + movdqa -128(%rdi),%xmm0 + movdqa -112(%rdi),%xmm1 + movdqa -96(%rdi),%xmm2 + pand 256(%r10),%xmm0 + movdqa -80(%rdi),%xmm3 + pand 272(%r10),%xmm1 + por %xmm0,%xmm4 + pand 288(%r10),%xmm2 + por %xmm1,%xmm5 + pand 304(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%rdi),%xmm0 + movdqa -48(%rdi),%xmm1 + movdqa -32(%rdi),%xmm2 + pand 320(%r10),%xmm0 + movdqa -16(%rdi),%xmm3 + pand 336(%r10),%xmm1 + por %xmm0,%xmm4 + pand 352(%r10),%xmm2 + por %xmm1,%xmm5 + pand 368(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%rdi),%xmm0 + movdqa 16(%rdi),%xmm1 + movdqa 32(%rdi),%xmm2 + pand 384(%r10),%xmm0 + movdqa 48(%rdi),%xmm3 + pand 400(%r10),%xmm1 + por %xmm0,%xmm4 + pand 416(%r10),%xmm2 + por %xmm1,%xmm5 + pand 432(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%rdi),%xmm0 + movdqa 80(%rdi),%xmm1 + movdqa 96(%rdi),%xmm2 + pand 448(%r10),%xmm0 + movdqa 112(%rdi),%xmm3 + pand 464(%r10),%xmm1 + por %xmm0,%xmm4 + pand 480(%r10),%xmm2 + por %xmm1,%xmm5 + pand 496(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + + movq %rbp,(%rbx) + leaq 32(%rbx,%rax,1),%rbx + mulxq 0(%rsi),%r8,%r11 + xorq %rbp,%rbp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + mulxq 24(%rsi),%rdx,%r14 + adoxq -16(%rbx),%r12 + adcxq %rdx,%r13 + leaq (%rcx,%rax,1),%rcx + leaq 32(%rsi),%rsi + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + adoxq %rbp,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + + movq %r8,%rdx + xorq %rbp,%rbp + movq %rdi,8+8(%rsp) + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r12 + movq %r11,-24(%rbx) + adoxq %rbp,%r15 + movq %r12,-16(%rbx) + leaq 32(%rcx),%rcx + jmp L$mulx4x_inner + +.p2align 5 +L$mulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + movq %r11,-32(%rbx) + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + leaq 32(%rcx),%rcx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + movq %r13,-16(%rbx) + + decq %rdi + jnz L$mulx4x_inner + + movq 0+8(%rsp),%rax + adcq %rbp,%r15 + subq 0(%rbx),%rdi + movq 8+8(%rsp),%rdi + movq 16+8(%rsp),%r10 + adcq %r15,%r14 + leaq (%rsi,%rax,1),%rsi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + + cmpq %r10,%rdi + jb L$mulx4x_outer + + movq -8(%rcx),%r10 + movq %rbp,%r8 + movq (%rcx,%rax,1),%r12 + leaq (%rcx,%rax,1),%rbp + movq %rax,%rcx + leaq (%rbx,%rax,1),%rdi + xorl %eax,%eax + xorq %r15,%r15 + subq %r14,%r10 + adcq %r15,%r15 + orq %r15,%r8 + sarq $3+2,%rcx + subq %r8,%rax + movq 56+8(%rsp),%rdx + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp L$sqrx4x_sub_entry + + + +.p2align 5 +bn_powerx5: + + movq %rsp,%rax + +L$powerx5_enter: + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$powerx5_prologue: + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$pwrx_sp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp L$pwrx_sp_done + +.p2align 5 +L$pwrx_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +L$pwrx_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwrx_page_walk + jmp L$pwrx_page_walk_done + +L$pwrx_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwrx_page_walk +L$pwrx_page_walk_done: + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + movq %r8,32(%rsp) + movq %rax,40(%rsp) + +L$powerx5_body: + + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + + movq %r10,%r9 + movq %rsi,%rdi +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq 40(%rsp),%rax + + call mulx4x_internal + + movq 40(%rsp),%rsi + + movq $1,%rax + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$powerx5_epilogue: + ret + + + +.globl _bn_sqrx8x_internal +.private_extern _bn_sqrx8x_internal +.private_extern _bn_sqrx8x_internal + +.p2align 5 +_bn_sqrx8x_internal: +__bn_sqrx8x_internal: + +_CET_ENDBR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 48+8(%rsp),%rdi + leaq (%rsi,%r9,1),%rbp + movq %r9,0+8(%rsp) + movq %rbp,8+8(%rsp) + jmp L$sqr8x_zero_start + +.p2align 5 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +L$sqrx8x_zero: +.byte 0x3e + movdqa %xmm0,0(%rdi) + movdqa %xmm0,16(%rdi) + movdqa %xmm0,32(%rdi) + movdqa %xmm0,48(%rdi) +L$sqr8x_zero_start: + movdqa %xmm0,64(%rdi) + movdqa %xmm0,80(%rdi) + movdqa %xmm0,96(%rdi) + movdqa %xmm0,112(%rdi) + leaq 128(%rdi),%rdi + subq $64,%r9 + jnz L$sqrx8x_zero + + movq 0(%rsi),%rdx + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + leaq 48+8(%rsp),%rdi + xorq %rbp,%rbp + jmp L$sqrx8x_outer_loop + +.p2align 5 +L$sqrx8x_outer_loop: + mulxq 8(%rsi),%r8,%rax + adcxq %r9,%r8 + adoxq %rax,%r10 + mulxq 16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 +.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcxq %r11,%r10 + adoxq %rax,%r12 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcxq %r12,%r11 + adoxq %rax,%r13 + mulxq 40(%rsi),%r12,%rax + adcxq %r13,%r12 + adoxq %rax,%r14 + mulxq 48(%rsi),%r13,%rax + adcxq %r14,%r13 + adoxq %r15,%rax + mulxq 56(%rsi),%r14,%r15 + movq 8(%rsi),%rdx + adcxq %rax,%r14 + adoxq %rbp,%r15 + adcq 64(%rdi),%r15 + movq %r8,8(%rdi) + movq %r9,16(%rdi) + sbbq %rcx,%rcx + xorq %rbp,%rbp + + + mulxq 16(%rsi),%r8,%rbx + mulxq 24(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 32(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %rbx,%r11 +.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcxq %r13,%r11 + adoxq %r14,%r12 +.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + movq 16(%rsi),%rdx + adcxq %rax,%r12 + adoxq %rbx,%r13 + adcxq %r15,%r13 + adoxq %rbp,%r14 + adcxq %rbp,%r14 + + movq %r8,24(%rdi) + movq %r9,32(%rdi) + + mulxq 24(%rsi),%r8,%rbx + mulxq 32(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 40(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %r13,%r11 +.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +.byte 0x3e + movq 24(%rsi),%rdx + adcxq %rbx,%r11 + adoxq %rax,%r12 + adcxq %r14,%r12 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + mulxq 32(%rsi),%r8,%rax + adoxq %rbp,%r13 + adcxq %rbp,%r13 + + mulxq 40(%rsi),%r9,%rbx + adcxq %r10,%r8 + adoxq %rax,%r9 + mulxq 48(%rsi),%r10,%rax + adcxq %r11,%r9 + adoxq %r12,%r10 + mulxq 56(%rsi),%r11,%r12 + movq 32(%rsi),%rdx + movq 40(%rsi),%r14 + adcxq %rbx,%r10 + adoxq %rax,%r11 + movq 48(%rsi),%r15 + adcxq %r13,%r11 + adoxq %rbp,%r12 + adcxq %rbp,%r12 + + movq %r8,56(%rdi) + movq %r9,64(%rdi) + + mulxq %r14,%r9,%rax + movq 56(%rsi),%r8 + adcxq %r10,%r9 + mulxq %r15,%r10,%rbx + adoxq %rax,%r10 + adcxq %r11,%r10 + mulxq %r8,%r11,%rax + movq %r14,%rdx + adoxq %rbx,%r11 + adcxq %r12,%r11 + + adcxq %rbp,%rax + + mulxq %r15,%r14,%rbx + mulxq %r8,%r12,%r13 + movq %r15,%rdx + leaq 64(%rsi),%rsi + adcxq %r14,%r11 + adoxq %rbx,%r12 + adcxq %rax,%r12 + adoxq %rbp,%r13 + +.byte 0x67,0x67 + mulxq %r8,%r8,%r14 + adcxq %r8,%r13 + adcxq %rbp,%r14 + + cmpq 8+8(%rsp),%rsi + je L$sqrx8x_outer_break + + negq %rcx + movq $-8,%rcx + movq %rbp,%r15 + movq 64(%rdi),%r8 + adcxq 72(%rdi),%r9 + adcxq 80(%rdi),%r10 + adcxq 88(%rdi),%r11 + adcq 96(%rdi),%r12 + adcq 104(%rdi),%r13 + adcq 112(%rdi),%r14 + adcq 120(%rdi),%r15 + leaq (%rsi),%rbp + leaq 128(%rdi),%rdi + sbbq %rax,%rax + + movq -64(%rsi),%rdx + movq %rax,16+8(%rsp) + movq %rdi,24+8(%rsp) + + + xorl %eax,%eax + jmp L$sqrx8x_loop + +.p2align 5 +L$sqrx8x_loop: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + movq %rbx,(%rdi,%rcx,8) + movl $0,%ebx + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + movq 8(%rsi,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rbx,%r15 + adcxq %rbx,%r15 + +.byte 0x67 + incq %rcx + jnz L$sqrx8x_loop + + leaq 64(%rbp),%rbp + movq $-8,%rcx + cmpq 8+8(%rsp),%rbp + je L$sqrx8x_break + + subq 16+8(%rsp),%rbx +.byte 0x66 + movq -64(%rsi),%rdx + adcxq 0(%rdi),%r8 + adcxq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi +.byte 0x67 + sbbq %rax,%rax + xorl %ebx,%ebx + movq %rax,16+8(%rsp) + jmp L$sqrx8x_loop + +.p2align 5 +L$sqrx8x_break: + xorq %rbp,%rbp + subq 16+8(%rsp),%rbx + adcxq %rbp,%r8 + movq 24+8(%rsp),%rcx + adcxq %rbp,%r9 + movq 0(%rsi),%rdx + adcq $0,%r10 + movq %r8,0(%rdi) + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + cmpq %rcx,%rdi + je L$sqrx8x_outer_loop + + movq %r9,8(%rdi) + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + movq 40(%rcx),%r13 + movq %r14,48(%rdi) + movq 48(%rcx),%r14 + movq %r15,56(%rdi) + movq 56(%rcx),%r15 + movq %rcx,%rdi + jmp L$sqrx8x_outer_loop + +.p2align 5 +L$sqrx8x_outer_break: + movq %r9,72(%rdi) +.byte 102,72,15,126,217 + movq %r10,80(%rdi) + movq %r11,88(%rdi) + movq %r12,96(%rdi) + movq %r13,104(%rdi) + movq %r14,112(%rdi) + leaq 48+8(%rsp),%rdi + movq (%rsi,%rcx,1),%rdx + + movq 8(%rdi),%r11 + xorq %r10,%r10 + movq 0+8(%rsp),%r9 + adoxq %r11,%r11 + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + + +.p2align 5 +L$sqrx4x_shift_n_add: + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax +.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 40(%rdi),%r11 + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + movq 16(%rsi,%rcx,1),%rdx + movq 48(%rdi),%r12 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 56(%rdi),%r13 + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax + movq 24(%rsi,%rcx,1),%rdx + leaq 32(%rcx),%rcx + movq 64(%rdi),%r10 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 72(%rdi),%r11 + movq %rax,32(%rdi) + movq %rbx,40(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + jrcxz L$sqrx4x_shift_n_add_break +.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 80(%rdi),%r12 + movq 88(%rdi),%r13 + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi + nop + jmp L$sqrx4x_shift_n_add + +.p2align 5 +L$sqrx4x_shift_n_add_break: + adcxq %r13,%rbx + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi +.byte 102,72,15,126,213 +__bn_sqrx8x_reduction: + xorl %eax,%eax + movq 32+8(%rsp),%rbx + movq 48+8(%rsp),%rdx + leaq -64(%rbp,%r9,1),%rcx + + movq %rcx,0+8(%rsp) + movq %rdi,8+8(%rsp) + + leaq 48+8(%rsp),%rdi + jmp L$sqrx8x_reduction_loop + +.p2align 5 +L$sqrx8x_reduction_loop: + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq %rdx,%r8 + imulq %rbx,%rdx + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,24+8(%rsp) + + leaq 64(%rdi),%rdi + xorq %rsi,%rsi + movq $-8,%rcx + jmp L$sqrx8x_reduce + +.p2align 5 +L$sqrx8x_reduce: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rbx,%r9 + adcxq %rbx,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 32+8(%rsp),%rbx,%rdx + movq %rax,%rdx + movq %rax,64+48+8(%rsp,%rcx,8) + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + +.byte 0x67,0x67,0x67 + incq %rcx + jnz L$sqrx8x_reduce + + movq %rsi,%rax + cmpq 0+8(%rsp),%rbp + jae L$sqrx8x_no_tail + + movq 48+8(%rsp),%rdx + addq 0(%rdi),%r8 + leaq 64(%rbp),%rbp + movq $-8,%rcx + adcxq 8(%rdi),%r9 + adcxq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp L$sqrx8x_tail + +.p2align 5 +L$sqrx8x_tail: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq 72+48+8(%rsp,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + movq %rbx,(%rdi,%rcx,8) + movq %r8,%rbx + adcxq %rsi,%r15 + + incq %rcx + jnz L$sqrx8x_tail + + cmpq 0+8(%rsp),%rbp + jae L$sqrx8x_tail_done + + subq 16+8(%rsp),%rsi + movq 48+8(%rsp),%rdx + leaq 64(%rbp),%rbp + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + subq $8,%rcx + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp L$sqrx8x_tail + +.p2align 5 +L$sqrx8x_tail_done: + xorq %rax,%rax + addq 24+8(%rsp),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rax + + subq 16+8(%rsp),%rsi +L$sqrx8x_no_tail: + adcq 0(%rdi),%r8 +.byte 102,72,15,126,217 + adcq 8(%rdi),%r9 + movq 56(%rbp),%rsi +.byte 102,72,15,126,213 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + + movq 32+8(%rsp),%rbx + movq 64(%rdi,%rcx,1),%rdx + + movq %r8,0(%rdi) + leaq 64(%rdi),%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 64(%rdi,%rcx,1),%rdi + cmpq 8+8(%rsp),%r8 + jb L$sqrx8x_reduction_loop + ret + + +.p2align 5 + +__bn_postx4x_internal: + + movq 0(%rbp),%r12 + movq %rcx,%r10 + movq %rcx,%r9 + negq %rax + sarq $3+2,%rcx + +.byte 102,72,15,126,202 +.byte 102,72,15,126,206 + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp L$sqrx4x_sub_entry + +.p2align 4 +L$sqrx4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +L$sqrx4x_sub_entry: + andnq %rax,%r12,%r12 + leaq 32(%rbp),%rbp + andnq %rax,%r13,%r13 + andnq %rax,%r14,%r14 + andnq %rax,%r15,%r15 + + negq %r8 + adcq 0(%rdi),%r12 + adcq 8(%rdi),%r13 + adcq 16(%rdi),%r14 + adcq 24(%rdi),%r15 + movq %r12,0(%rdx) + leaq 32(%rdi),%rdi + movq %r13,8(%rdx) + sbbq %r8,%r8 + movq %r14,16(%rdx) + movq %r15,24(%rdx) + leaq 32(%rdx),%rdx + + incq %rcx + jnz L$sqrx4x_sub + + negq %r9 + + ret + + +.globl _bn_scatter5 +.private_extern _bn_scatter5 + +.p2align 4 +_bn_scatter5: + +_CET_ENDBR + cmpl $0,%esi + jz L$scatter_epilogue + + + + + + + + + + leaq (%rdx,%rcx,8),%rdx +L$scatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz L$scatter +L$scatter_epilogue: + ret + + + +.globl _bn_gather5 +.private_extern _bn_gather5 + +.p2align 5 +_bn_gather5: + +L$SEH_begin_bn_gather5: +_CET_ENDBR + +.byte 0x4c,0x8d,0x14,0x24 + +.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 + leaq L$inc(%rip),%rax + andq $-16,%rsp + + movd %ecx,%xmm5 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 128(%rdx),%r11 + leaq 128(%rsp),%rax + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-128(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-112(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-96(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-80(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-48(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-16(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,0(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,16(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,48(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,80(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,96(%rax) + movdqa %xmm4,%xmm2 + movdqa %xmm3,112(%rax) + jmp L$gather + +.p2align 5 +L$gather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r11),%xmm0 + movdqa -112(%r11),%xmm1 + movdqa -96(%r11),%xmm2 + pand -128(%rax),%xmm0 + movdqa -80(%r11),%xmm3 + pand -112(%rax),%xmm1 + por %xmm0,%xmm4 + pand -96(%rax),%xmm2 + por %xmm1,%xmm5 + pand -80(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r11),%xmm0 + movdqa -48(%r11),%xmm1 + movdqa -32(%r11),%xmm2 + pand -64(%rax),%xmm0 + movdqa -16(%r11),%xmm3 + pand -48(%rax),%xmm1 + por %xmm0,%xmm4 + pand -32(%rax),%xmm2 + por %xmm1,%xmm5 + pand -16(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + pand 0(%rax),%xmm0 + movdqa 48(%r11),%xmm3 + pand 16(%rax),%xmm1 + por %xmm0,%xmm4 + pand 32(%rax),%xmm2 + por %xmm1,%xmm5 + pand 48(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r11),%xmm0 + movdqa 80(%r11),%xmm1 + movdqa 96(%r11),%xmm2 + pand 64(%rax),%xmm0 + movdqa 112(%r11),%xmm3 + pand 80(%rax),%xmm1 + por %xmm0,%xmm4 + pand 96(%rax),%xmm2 + por %xmm1,%xmm5 + pand 112(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + leaq 256(%r11),%r11 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz L$gather + + leaq (%r10),%rsp + + ret +L$SEH_end_bn_gather5: + + +.section __DATA,__const +.p2align 6 +L$inc: +.long 0,0, 1,1 +.long 2,2, 2,2 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-linux.S b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-linux.S new file mode 100644 index 0000000000..14ab4f72a4 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-linux.S @@ -0,0 +1,3625 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl bn_mul_mont_gather5 +.hidden bn_mul_mont_gather5 +.type bn_mul_mont_gather5,@function +.align 64 +bn_mul_mont_gather5: +.cfi_startproc +_CET_ENDBR + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax + testl $7,%r9d + jnz .Lmul_enter + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 8(%r11),%r11d + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + movd 8(%rsp),%xmm5 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + + + + + + + + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + +.Lmul_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + + leaq .Linc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) +.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 +.Lmul_body: + + leaq 128(%rdx),%r12 + movdqa 0(%r10),%xmm0 + movdqa 16(%r10),%xmm1 + leaq 24-112(%rsp,%r9,8),%r10 + andq $-16,%r10 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r9,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + leaq 24+128(%rsp,%r9,8),%rdx + andq $-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 + + movq (%rsi),%rax +.byte 102,72,15,126,195 + + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r9,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r9,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + movq $-1,%rbx + xorq %rax,%rbx + xorq %r14,%r14 + movq %r9,%r15 + +.Lcopy: + movq (%rdi,%r14,8),%rcx + movq (%rsp,%r14,8),%rdx + andq %rbx,%rcx + andq %rax,%rdx + movq %r14,(%rsp,%r14,8) + orq %rcx,%rdx + movq %rdx,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul_epilogue: + ret +.cfi_endproc +.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.type bn_mul4x_mont_gather5,@function +.align 32 +bn_mul4x_mont_gather5: +.cfi_startproc +.byte 0x67 + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lmul4x_enter: + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je .Lmulx4x_enter + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lmul4x_prologue: + +.byte 0x67 + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmul4xsp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lmul4xsp_done + +.align 32 +.Lmul4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lmul4xsp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + +.Lmul4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: + + negq %r9 + + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lmul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul4x_epilogue: + ret +.cfi_endproc +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 + +.type mul4x_internal,@function +.align 32 +mul4x_internal: +.cfi_startproc + shlq $5,%r9 + movd 8(%rax),%xmm5 + leaq .Linc(%rip),%rax + leaq 128(%rdx,%r9,1),%r13 + shrq $5,%r9 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r9,1),%r10 + leaq 128(%rdx),%r12 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67,0x67 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + leaq 64+8(%rsp),%r14 + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp .L1st4x + +.align 32 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + leaq (%rcx,%r9,1),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp .Louter4x + +.align 32 +.Louter4x: + leaq 16+128(%r14),%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + movq %rdi,(%r14) + + leaq (%r14,%r9,1),%r14 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp .Linner4x + +.align 32 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -8(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + movq %rdi,-16(%r14) + leaq (%rcx,%r9,1),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb .Louter4x + xorq %rax,%rax + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + subq %rdi,%rax + leaq (%r14,%r9,1),%rbx + movq (%rcx),%r12 + leaq (%rcx),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + decq %r12 + xorq %r10,%r10 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqr4x_sub_entry +.cfi_endproc +.size mul4x_internal,.-mul4x_internal +.globl bn_power5 +.hidden bn_power5 +.type bn_power5,@function +.align 32 +bn_power5: +.cfi_startproc +_CET_ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 8(%r11),%r11d + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je .Lpowerx5_enter + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lpower5_prologue: + + shll $3,%r9d + leal (%r9,%r9,2),%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwr_sp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lpwr_sp_done + +.align 32 +.Lpwr_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lpwr_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk + jmp .Lpwr_page_walk_done + +.Lpwr_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk +.Lpwr_page_walk_done: + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lpower5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpower5_epilogue: + ret +.cfi_endproc +.size bn_power5,.-bn_power5 + +.globl bn_sqr8x_internal +.hidden bn_sqr8x_internal +.hidden bn_sqr8x_internal +.type bn_sqr8x_internal,@function +.align 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: +.cfi_startproc +_CET_ENDBR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 32 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp .Lsqr4x_outer + +.align 32 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp .Lsqr4x_inner + +.align 32 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp .Lsqr4x_shift_n_add + +.align 32 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +__bn_sqr8x_reduction: + xorq %rax,%rax + leaq (%r9,%rbp,1),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp .L8x_reduction_loop + +.align 32 +.L8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .L8x_reduce + +.align 32 +.L8x_reduce: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_reduce + + leaq 64(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp .L8x_tail + +.align 32 +.L8x_tail: + mulq %rbx + addq %rax,%r8 + movq 8(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_tail + + leaq 64(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp .L8x_tail + +.align 32 +.L8x_tail_done: + xorq %rax,%rax + addq (%rdx),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rax + + negq %rsi +.L8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -8(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb .L8x_reduction_loop + ret +.cfi_endproc +.size bn_sqr8x_internal,.-bn_sqr8x_internal +.type __bn_post4x_internal,@function +.align 32 +__bn_post4x_internal: +.cfi_startproc + movq 0(%rbp),%r12 + leaq (%rdi,%r9,1),%rbx + movq %r9,%rcx +.byte 102,72,15,126,207 + negq %rax +.byte 102,72,15,126,206 + sarq $3+2,%rcx + decq %r12 + xorq %r10,%r10 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqr4x_sub_entry + +.align 16 +.Lsqr4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +.Lsqr4x_sub_entry: + leaq 32(%rbp),%rbp + notq %r12 + notq %r13 + notq %r14 + notq %r15 + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + + negq %r10 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + adcq 16(%rbx),%r14 + adcq 24(%rbx),%r15 + movq %r12,0(%rdi) + leaq 32(%rbx),%rbx + movq %r13,8(%rdi) + sbbq %r10,%r10 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz .Lsqr4x_sub + + movq %r9,%r10 + negq %r9 + ret +.cfi_endproc +.size __bn_post4x_internal,.-__bn_post4x_internal +.type bn_mulx4x_mont_gather5,@function +.align 32 +bn_mulx4x_mont_gather5: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lmulx4x_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lmulx4x_prologue: + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmulx4xsp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lmulx4xsp_done + +.Lmulx4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lmulx4xsp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk + jmp .Lmulx4x_page_walk_done + +.Lmulx4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk +.Lmulx4x_page_walk_done: + + + + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lmulx4x_body: + call mulx4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmulx4x_epilogue: + ret +.cfi_endproc +.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 + +.type mulx4x_internal,@function +.align 32 +mulx4x_internal: +.cfi_startproc + movq %r9,8(%rsp) + movq %r9,%r10 + negq %r9 + shlq $5,%r9 + negq %r10 + leaq 128(%rdx,%r9,1),%r13 + shrq $5+5,%r9 + movd 8(%rax),%xmm5 + subq $1,%r9 + leaq .Linc(%rip),%rax + movq %r13,16+8(%rsp) + movq %r9,24+8(%rsp) + movq %rdi,56+8(%rsp) + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r10,1),%r10 + leaq 128(%rdx),%rdi + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67 + movdqa %xmm1,%xmm2 +.byte 0x67 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 +.byte 0x67 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + + pand 64(%rdi),%xmm0 + pand 80(%rdi),%xmm1 + pand 96(%rdi),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%rdi),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%rdi),%xmm4 + movdqa -112(%rdi),%xmm5 + movdqa -96(%rdi),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%rdi),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%rdi),%xmm4 + movdqa -48(%rdi),%xmm5 + movdqa -32(%rdi),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%rdi),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%rdi),%xmm4 + movdqa 16(%rdi),%xmm5 + movdqa 32(%rdi),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%rdi),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + pxor %xmm1,%xmm0 + + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + leaq 64+32+8(%rsp),%rbx + + movq %rdx,%r9 + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r12 + addq %rax,%r11 + mulxq 16(%rsi),%rax,%r13 + adcq %rax,%r12 + adcq $0,%r13 + mulxq 24(%rsi),%rax,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + xorq %rbp,%rbp + movq %r8,%rdx + + movq %rdi,8+8(%rsp) + + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 8(%rsp),%rax + adcq %rbp,%r15 + leaq (%rsi,%rax,1),%rsi + addq %r15,%r14 + movq 8+8(%rsp),%rdi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + leaq 16-256(%rbx),%r10 + pxor %xmm4,%xmm4 +.byte 0x67,0x67 + pxor %xmm5,%xmm5 + movdqa -128(%rdi),%xmm0 + movdqa -112(%rdi),%xmm1 + movdqa -96(%rdi),%xmm2 + pand 256(%r10),%xmm0 + movdqa -80(%rdi),%xmm3 + pand 272(%r10),%xmm1 + por %xmm0,%xmm4 + pand 288(%r10),%xmm2 + por %xmm1,%xmm5 + pand 304(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%rdi),%xmm0 + movdqa -48(%rdi),%xmm1 + movdqa -32(%rdi),%xmm2 + pand 320(%r10),%xmm0 + movdqa -16(%rdi),%xmm3 + pand 336(%r10),%xmm1 + por %xmm0,%xmm4 + pand 352(%r10),%xmm2 + por %xmm1,%xmm5 + pand 368(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%rdi),%xmm0 + movdqa 16(%rdi),%xmm1 + movdqa 32(%rdi),%xmm2 + pand 384(%r10),%xmm0 + movdqa 48(%rdi),%xmm3 + pand 400(%r10),%xmm1 + por %xmm0,%xmm4 + pand 416(%r10),%xmm2 + por %xmm1,%xmm5 + pand 432(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%rdi),%xmm0 + movdqa 80(%rdi),%xmm1 + movdqa 96(%rdi),%xmm2 + pand 448(%r10),%xmm0 + movdqa 112(%rdi),%xmm3 + pand 464(%r10),%xmm1 + por %xmm0,%xmm4 + pand 480(%r10),%xmm2 + por %xmm1,%xmm5 + pand 496(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + + movq %rbp,(%rbx) + leaq 32(%rbx,%rax,1),%rbx + mulxq 0(%rsi),%r8,%r11 + xorq %rbp,%rbp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + mulxq 24(%rsi),%rdx,%r14 + adoxq -16(%rbx),%r12 + adcxq %rdx,%r13 + leaq (%rcx,%rax,1),%rcx + leaq 32(%rsi),%rsi + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + adoxq %rbp,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + + movq %r8,%rdx + xorq %rbp,%rbp + movq %rdi,8+8(%rsp) + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r12 + movq %r11,-24(%rbx) + adoxq %rbp,%r15 + movq %r12,-16(%rbx) + leaq 32(%rcx),%rcx + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + movq %r11,-32(%rbx) + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + leaq 32(%rcx),%rcx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0+8(%rsp),%rax + adcq %rbp,%r15 + subq 0(%rbx),%rdi + movq 8+8(%rsp),%rdi + movq 16+8(%rsp),%r10 + adcq %r15,%r14 + leaq (%rsi,%rax,1),%rsi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + + cmpq %r10,%rdi + jb .Lmulx4x_outer + + movq -8(%rcx),%r10 + movq %rbp,%r8 + movq (%rcx,%rax,1),%r12 + leaq (%rcx,%rax,1),%rbp + movq %rax,%rcx + leaq (%rbx,%rax,1),%rdi + xorl %eax,%eax + xorq %r15,%r15 + subq %r14,%r10 + adcq %r15,%r15 + orq %r15,%r8 + sarq $3+2,%rcx + subq %r8,%rax + movq 56+8(%rsp),%rdx + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqrx4x_sub_entry +.cfi_endproc +.size mulx4x_internal,.-mulx4x_internal +.type bn_powerx5,@function +.align 32 +bn_powerx5: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lpowerx5_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lpowerx5_prologue: + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwrx_sp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lpwrx_sp_done + +.align 32 +.Lpwrx_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lpwrx_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwrx_page_walk + jmp .Lpwrx_page_walk_done + +.Lpwrx_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwrx_page_walk +.Lpwrx_page_walk_done: + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lpowerx5_body: + + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + + movq %r10,%r9 + movq %rsi,%rdi +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq 40(%rsp),%rax + + call mulx4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpowerx5_epilogue: + ret +.cfi_endproc +.size bn_powerx5,.-bn_powerx5 + +.globl bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.type bn_sqrx8x_internal,@function +.align 32 +bn_sqrx8x_internal: +__bn_sqrx8x_internal: +.cfi_startproc +_CET_ENDBR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 48+8(%rsp),%rdi + leaq (%rsi,%r9,1),%rbp + movq %r9,0+8(%rsp) + movq %rbp,8+8(%rsp) + jmp .Lsqr8x_zero_start + +.align 32 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +.Lsqrx8x_zero: +.byte 0x3e + movdqa %xmm0,0(%rdi) + movdqa %xmm0,16(%rdi) + movdqa %xmm0,32(%rdi) + movdqa %xmm0,48(%rdi) +.Lsqr8x_zero_start: + movdqa %xmm0,64(%rdi) + movdqa %xmm0,80(%rdi) + movdqa %xmm0,96(%rdi) + movdqa %xmm0,112(%rdi) + leaq 128(%rdi),%rdi + subq $64,%r9 + jnz .Lsqrx8x_zero + + movq 0(%rsi),%rdx + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + leaq 48+8(%rsp),%rdi + xorq %rbp,%rbp + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_loop: + mulxq 8(%rsi),%r8,%rax + adcxq %r9,%r8 + adoxq %rax,%r10 + mulxq 16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 +.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcxq %r11,%r10 + adoxq %rax,%r12 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcxq %r12,%r11 + adoxq %rax,%r13 + mulxq 40(%rsi),%r12,%rax + adcxq %r13,%r12 + adoxq %rax,%r14 + mulxq 48(%rsi),%r13,%rax + adcxq %r14,%r13 + adoxq %r15,%rax + mulxq 56(%rsi),%r14,%r15 + movq 8(%rsi),%rdx + adcxq %rax,%r14 + adoxq %rbp,%r15 + adcq 64(%rdi),%r15 + movq %r8,8(%rdi) + movq %r9,16(%rdi) + sbbq %rcx,%rcx + xorq %rbp,%rbp + + + mulxq 16(%rsi),%r8,%rbx + mulxq 24(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 32(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %rbx,%r11 +.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcxq %r13,%r11 + adoxq %r14,%r12 +.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + movq 16(%rsi),%rdx + adcxq %rax,%r12 + adoxq %rbx,%r13 + adcxq %r15,%r13 + adoxq %rbp,%r14 + adcxq %rbp,%r14 + + movq %r8,24(%rdi) + movq %r9,32(%rdi) + + mulxq 24(%rsi),%r8,%rbx + mulxq 32(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 40(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %r13,%r11 +.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +.byte 0x3e + movq 24(%rsi),%rdx + adcxq %rbx,%r11 + adoxq %rax,%r12 + adcxq %r14,%r12 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + mulxq 32(%rsi),%r8,%rax + adoxq %rbp,%r13 + adcxq %rbp,%r13 + + mulxq 40(%rsi),%r9,%rbx + adcxq %r10,%r8 + adoxq %rax,%r9 + mulxq 48(%rsi),%r10,%rax + adcxq %r11,%r9 + adoxq %r12,%r10 + mulxq 56(%rsi),%r11,%r12 + movq 32(%rsi),%rdx + movq 40(%rsi),%r14 + adcxq %rbx,%r10 + adoxq %rax,%r11 + movq 48(%rsi),%r15 + adcxq %r13,%r11 + adoxq %rbp,%r12 + adcxq %rbp,%r12 + + movq %r8,56(%rdi) + movq %r9,64(%rdi) + + mulxq %r14,%r9,%rax + movq 56(%rsi),%r8 + adcxq %r10,%r9 + mulxq %r15,%r10,%rbx + adoxq %rax,%r10 + adcxq %r11,%r10 + mulxq %r8,%r11,%rax + movq %r14,%rdx + adoxq %rbx,%r11 + adcxq %r12,%r11 + + adcxq %rbp,%rax + + mulxq %r15,%r14,%rbx + mulxq %r8,%r12,%r13 + movq %r15,%rdx + leaq 64(%rsi),%rsi + adcxq %r14,%r11 + adoxq %rbx,%r12 + adcxq %rax,%r12 + adoxq %rbp,%r13 + +.byte 0x67,0x67 + mulxq %r8,%r8,%r14 + adcxq %r8,%r13 + adcxq %rbp,%r14 + + cmpq 8+8(%rsp),%rsi + je .Lsqrx8x_outer_break + + negq %rcx + movq $-8,%rcx + movq %rbp,%r15 + movq 64(%rdi),%r8 + adcxq 72(%rdi),%r9 + adcxq 80(%rdi),%r10 + adcxq 88(%rdi),%r11 + adcq 96(%rdi),%r12 + adcq 104(%rdi),%r13 + adcq 112(%rdi),%r14 + adcq 120(%rdi),%r15 + leaq (%rsi),%rbp + leaq 128(%rdi),%rdi + sbbq %rax,%rax + + movq -64(%rsi),%rdx + movq %rax,16+8(%rsp) + movq %rdi,24+8(%rsp) + + + xorl %eax,%eax + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_loop: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + movq %rbx,(%rdi,%rcx,8) + movl $0,%ebx + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + movq 8(%rsi,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rbx,%r15 + adcxq %rbx,%r15 + +.byte 0x67 + incq %rcx + jnz .Lsqrx8x_loop + + leaq 64(%rbp),%rbp + movq $-8,%rcx + cmpq 8+8(%rsp),%rbp + je .Lsqrx8x_break + + subq 16+8(%rsp),%rbx +.byte 0x66 + movq -64(%rsi),%rdx + adcxq 0(%rdi),%r8 + adcxq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi +.byte 0x67 + sbbq %rax,%rax + xorl %ebx,%ebx + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_break: + xorq %rbp,%rbp + subq 16+8(%rsp),%rbx + adcxq %rbp,%r8 + movq 24+8(%rsp),%rcx + adcxq %rbp,%r9 + movq 0(%rsi),%rdx + adcq $0,%r10 + movq %r8,0(%rdi) + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + cmpq %rcx,%rdi + je .Lsqrx8x_outer_loop + + movq %r9,8(%rdi) + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + movq 40(%rcx),%r13 + movq %r14,48(%rdi) + movq 48(%rcx),%r14 + movq %r15,56(%rdi) + movq 56(%rcx),%r15 + movq %rcx,%rdi + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_break: + movq %r9,72(%rdi) +.byte 102,72,15,126,217 + movq %r10,80(%rdi) + movq %r11,88(%rdi) + movq %r12,96(%rdi) + movq %r13,104(%rdi) + movq %r14,112(%rdi) + leaq 48+8(%rsp),%rdi + movq (%rsi,%rcx,1),%rdx + + movq 8(%rdi),%r11 + xorq %r10,%r10 + movq 0+8(%rsp),%r9 + adoxq %r11,%r11 + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + + +.align 32 +.Lsqrx4x_shift_n_add: + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax +.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 40(%rdi),%r11 + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + movq 16(%rsi,%rcx,1),%rdx + movq 48(%rdi),%r12 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 56(%rdi),%r13 + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax + movq 24(%rsi,%rcx,1),%rdx + leaq 32(%rcx),%rcx + movq 64(%rdi),%r10 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 72(%rdi),%r11 + movq %rax,32(%rdi) + movq %rbx,40(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + jrcxz .Lsqrx4x_shift_n_add_break +.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 80(%rdi),%r12 + movq 88(%rdi),%r13 + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi + nop + jmp .Lsqrx4x_shift_n_add + +.align 32 +.Lsqrx4x_shift_n_add_break: + adcxq %r13,%rbx + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi +.byte 102,72,15,126,213 +__bn_sqrx8x_reduction: + xorl %eax,%eax + movq 32+8(%rsp),%rbx + movq 48+8(%rsp),%rdx + leaq -64(%rbp,%r9,1),%rcx + + movq %rcx,0+8(%rsp) + movq %rdi,8+8(%rsp) + + leaq 48+8(%rsp),%rdi + jmp .Lsqrx8x_reduction_loop + +.align 32 +.Lsqrx8x_reduction_loop: + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq %rdx,%r8 + imulq %rbx,%rdx + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,24+8(%rsp) + + leaq 64(%rdi),%rdi + xorq %rsi,%rsi + movq $-8,%rcx + jmp .Lsqrx8x_reduce + +.align 32 +.Lsqrx8x_reduce: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rbx,%r9 + adcxq %rbx,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 32+8(%rsp),%rbx,%rdx + movq %rax,%rdx + movq %rax,64+48+8(%rsp,%rcx,8) + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + +.byte 0x67,0x67,0x67 + incq %rcx + jnz .Lsqrx8x_reduce + + movq %rsi,%rax + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_no_tail + + movq 48+8(%rsp),%rdx + addq 0(%rdi),%r8 + leaq 64(%rbp),%rbp + movq $-8,%rcx + adcxq 8(%rdi),%r9 + adcxq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq 72+48+8(%rsp,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + movq %rbx,(%rdi,%rcx,8) + movq %r8,%rbx + adcxq %rsi,%r15 + + incq %rcx + jnz .Lsqrx8x_tail + + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_tail_done + + subq 16+8(%rsp),%rsi + movq 48+8(%rsp),%rdx + leaq 64(%rbp),%rbp + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + subq $8,%rcx + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail_done: + xorq %rax,%rax + addq 24+8(%rsp),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rax + + subq 16+8(%rsp),%rsi +.Lsqrx8x_no_tail: + adcq 0(%rdi),%r8 +.byte 102,72,15,126,217 + adcq 8(%rdi),%r9 + movq 56(%rbp),%rsi +.byte 102,72,15,126,213 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + + movq 32+8(%rsp),%rbx + movq 64(%rdi,%rcx,1),%rdx + + movq %r8,0(%rdi) + leaq 64(%rdi),%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 64(%rdi,%rcx,1),%rdi + cmpq 8+8(%rsp),%r8 + jb .Lsqrx8x_reduction_loop + ret +.cfi_endproc +.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +.align 32 +.type __bn_postx4x_internal,@function +__bn_postx4x_internal: +.cfi_startproc + movq 0(%rbp),%r12 + movq %rcx,%r10 + movq %rcx,%r9 + negq %rax + sarq $3+2,%rcx + +.byte 102,72,15,126,202 +.byte 102,72,15,126,206 + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqrx4x_sub_entry + +.align 16 +.Lsqrx4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +.Lsqrx4x_sub_entry: + andnq %rax,%r12,%r12 + leaq 32(%rbp),%rbp + andnq %rax,%r13,%r13 + andnq %rax,%r14,%r14 + andnq %rax,%r15,%r15 + + negq %r8 + adcq 0(%rdi),%r12 + adcq 8(%rdi),%r13 + adcq 16(%rdi),%r14 + adcq 24(%rdi),%r15 + movq %r12,0(%rdx) + leaq 32(%rdi),%rdi + movq %r13,8(%rdx) + sbbq %r8,%r8 + movq %r14,16(%rdx) + movq %r15,24(%rdx) + leaq 32(%rdx),%rdx + + incq %rcx + jnz .Lsqrx4x_sub + + negq %r9 + + ret +.cfi_endproc +.size __bn_postx4x_internal,.-__bn_postx4x_internal +.globl bn_scatter5 +.hidden bn_scatter5 +.type bn_scatter5,@function +.align 16 +bn_scatter5: +.cfi_startproc +_CET_ENDBR + cmpl $0,%esi + jz .Lscatter_epilogue + + + + + + + + + + leaq (%rdx,%rcx,8),%rdx +.Lscatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz .Lscatter +.Lscatter_epilogue: + ret +.cfi_endproc +.size bn_scatter5,.-bn_scatter5 + +.globl bn_gather5 +.hidden bn_gather5 +.type bn_gather5,@function +.align 32 +bn_gather5: +.cfi_startproc +.LSEH_begin_bn_gather5: +_CET_ENDBR + +.byte 0x4c,0x8d,0x14,0x24 +.cfi_def_cfa_register %r10 +.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 + leaq .Linc(%rip),%rax + andq $-16,%rsp + + movd %ecx,%xmm5 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 128(%rdx),%r11 + leaq 128(%rsp),%rax + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-128(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-112(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-96(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-80(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-48(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-16(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,0(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,16(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,48(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,80(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,96(%rax) + movdqa %xmm4,%xmm2 + movdqa %xmm3,112(%rax) + jmp .Lgather + +.align 32 +.Lgather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r11),%xmm0 + movdqa -112(%r11),%xmm1 + movdqa -96(%r11),%xmm2 + pand -128(%rax),%xmm0 + movdqa -80(%r11),%xmm3 + pand -112(%rax),%xmm1 + por %xmm0,%xmm4 + pand -96(%rax),%xmm2 + por %xmm1,%xmm5 + pand -80(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r11),%xmm0 + movdqa -48(%r11),%xmm1 + movdqa -32(%r11),%xmm2 + pand -64(%rax),%xmm0 + movdqa -16(%r11),%xmm3 + pand -48(%rax),%xmm1 + por %xmm0,%xmm4 + pand -32(%rax),%xmm2 + por %xmm1,%xmm5 + pand -16(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + pand 0(%rax),%xmm0 + movdqa 48(%r11),%xmm3 + pand 16(%rax),%xmm1 + por %xmm0,%xmm4 + pand 32(%rax),%xmm2 + por %xmm1,%xmm5 + pand 48(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r11),%xmm0 + movdqa 80(%r11),%xmm1 + movdqa 96(%r11),%xmm2 + pand 64(%rax),%xmm0 + movdqa 112(%r11),%xmm3 + pand 80(%rax),%xmm1 + por %xmm0,%xmm4 + pand 96(%rax),%xmm2 + por %xmm1,%xmm5 + pand 112(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + leaq 256(%r11),%r11 + + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz .Lgather + + leaq (%r10),%rsp +.cfi_def_cfa_register %rsp + ret +.LSEH_end_bn_gather5: +.cfi_endproc +.size bn_gather5,.-bn_gather5 +.section .rodata +.align 64 +.Linc: +.long 0,0, 1,1 +.long 2,2, 2,2 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +#endif diff --git a/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-win.asm b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-win.asm new file mode 100644 index 0000000000..46aae51739 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/bcm/x86_64-mont5-win.asm @@ -0,0 +1,3864 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P + +global bn_mul_mont_gather5 + +ALIGN 64 +bn_mul_mont_gather5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont_gather5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + mov r9d,r9d + mov rax,rsp + + test r9d,7 + jnz NEAR $L$mul_enter + lea r11,[OPENSSL_ia32cap_P] + mov r11d,DWORD[8+r11] + jmp NEAR $L$mul4x_enter + +ALIGN 16 +$L$mul_enter: + movd xmm5,DWORD[56+rsp] + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + + neg r9 + mov r11,rsp + lea r10,[((-280))+r9*8+rsp] + neg r9 + and r10,-1024 + + + + + + + + + + sub r11,r10 + and r11,-4096 + lea rsp,[r11*1+r10] + mov r11,QWORD[rsp] + cmp rsp,r10 + ja NEAR $L$mul_page_walk + jmp NEAR $L$mul_page_walk_done + +$L$mul_page_walk: + lea rsp,[((-4096))+rsp] + mov r11,QWORD[rsp] + cmp rsp,r10 + ja NEAR $L$mul_page_walk +$L$mul_page_walk_done: + + lea r10,[$L$inc] + mov QWORD[8+r9*8+rsp],rax + +$L$mul_body: + + lea r12,[128+rdx] + movdqa xmm0,XMMWORD[r10] + movdqa xmm1,XMMWORD[16+r10] + lea r10,[((24-112))+r9*8+rsp] + and r10,-16 + + pshufd xmm5,xmm5,0 + movdqa xmm4,xmm1 + movdqa xmm2,xmm1 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + DB 0x67 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[112+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[128+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[144+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[160+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[176+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[192+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[208+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[224+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[240+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[256+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[272+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[288+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[304+r10],xmm0 + + paddd xmm3,xmm2 + DB 0x67 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[320+r10],xmm1 + + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[336+r10],xmm2 + pand xmm0,XMMWORD[64+r12] + + pand xmm1,XMMWORD[80+r12] + pand xmm2,XMMWORD[96+r12] + movdqa XMMWORD[352+r10],xmm3 + pand xmm3,XMMWORD[112+r12] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[((-128))+r12] + movdqa xmm5,XMMWORD[((-112))+r12] + movdqa xmm2,XMMWORD[((-96))+r12] + pand xmm4,XMMWORD[112+r10] + movdqa xmm3,XMMWORD[((-80))+r12] + pand xmm5,XMMWORD[128+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[144+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[160+r10] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[((-64))+r12] + movdqa xmm5,XMMWORD[((-48))+r12] + movdqa xmm2,XMMWORD[((-32))+r12] + pand xmm4,XMMWORD[176+r10] + movdqa xmm3,XMMWORD[((-16))+r12] + pand xmm5,XMMWORD[192+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[208+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[224+r10] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[r12] + movdqa xmm5,XMMWORD[16+r12] + movdqa xmm2,XMMWORD[32+r12] + pand xmm4,XMMWORD[240+r10] + movdqa xmm3,XMMWORD[48+r12] + pand xmm5,XMMWORD[256+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[272+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[288+r10] + por xmm0,xmm2 + por xmm1,xmm3 + por xmm0,xmm1 + + pshufd xmm1,xmm0,0x4e + por xmm0,xmm1 + lea r12,[256+r12] +DB 102,72,15,126,195 + + mov r8,QWORD[r8] + mov rax,QWORD[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$1st_enter + +ALIGN 16 +$L$1st: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + lea r15,[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne NEAR $L$1st + + + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD[((-16))+r9*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + jmp NEAR $L$outer +ALIGN 16 +$L$outer: + lea rdx,[((24+128))+r9*8+rsp] + and rdx,-16 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movdqa xmm0,XMMWORD[((-128))+r12] + movdqa xmm1,XMMWORD[((-112))+r12] + movdqa xmm2,XMMWORD[((-96))+r12] + movdqa xmm3,XMMWORD[((-80))+r12] + pand xmm0,XMMWORD[((-128))+rdx] + pand xmm1,XMMWORD[((-112))+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[((-96))+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[((-80))+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[((-64))+r12] + movdqa xmm1,XMMWORD[((-48))+r12] + movdqa xmm2,XMMWORD[((-32))+r12] + movdqa xmm3,XMMWORD[((-16))+r12] + pand xmm0,XMMWORD[((-64))+rdx] + pand xmm1,XMMWORD[((-48))+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[((-32))+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[((-16))+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[r12] + movdqa xmm1,XMMWORD[16+r12] + movdqa xmm2,XMMWORD[32+r12] + movdqa xmm3,XMMWORD[48+r12] + pand xmm0,XMMWORD[rdx] + pand xmm1,XMMWORD[16+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[32+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[48+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[64+r12] + movdqa xmm1,XMMWORD[80+r12] + movdqa xmm2,XMMWORD[96+r12] + movdqa xmm3,XMMWORD[112+r12] + pand xmm0,XMMWORD[64+rdx] + pand xmm1,XMMWORD[80+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[96+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[112+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + por xmm4,xmm5 + + pshufd xmm0,xmm4,0x4e + por xmm0,xmm4 + lea r12,[256+r12] + + mov rax,QWORD[rsi] +DB 102,72,15,126,195 + + xor r15,r15 + mov rbp,r8 + mov r10,QWORD[rsp] + + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r10,QWORD[8+rsp] + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$inner_enter + +ALIGN 16 +$L$inner: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,[1+r15] + + mul rbp + cmp r15,r9 + jne NEAR $L$inner + + add r13,rax + adc rdx,0 + add r13,r10 + mov r10,QWORD[r9*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r9*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + cmp r14,r9 + jb NEAR $L$outer + + xor r14,r14 + mov rax,QWORD[rsp] + lea rsi,[rsp] + mov r15,r9 + jmp NEAR $L$sub +ALIGN 16 +$L$sub: sbb rax,QWORD[r14*8+rcx] + mov QWORD[r14*8+rdi],rax + mov rax,QWORD[8+r14*8+rsi] + lea r14,[1+r14] + dec r15 + jnz NEAR $L$sub + + sbb rax,0 + mov rbx,-1 + xor rbx,rax + xor r14,r14 + mov r15,r9 + +$L$copy: + mov rcx,QWORD[r14*8+rdi] + mov rdx,QWORD[r14*8+rsp] + and rcx,rbx + and rdx,rax + mov QWORD[r14*8+rsp],r14 + or rdx,rcx + mov QWORD[r14*8+rdi],rdx + lea r14,[1+r14] + sub r15,1 + jnz NEAR $L$copy + + mov rsi,QWORD[8+r9*8+rsp] + + mov rax,1 + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_mul_mont_gather5: + +ALIGN 32 +bn_mul4x_mont_gather5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont_gather5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + + DB 0x67 + mov rax,rsp + +$L$mul4x_enter: + and r11d,0x80108 + cmp r11d,0x80108 + je NEAR $L$mulx4x_enter + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$mul4x_prologue: + + DB 0x67 + shl r9d,3 + lea r10,[r9*2+r9] + neg r9 + + + + + + + + + + + lea r11,[((-320))+r9*2+rsp] + mov rbp,rsp + sub r11,rdi + and r11,4095 + cmp r10,r11 + jb NEAR $L$mul4xsp_alt + sub rbp,r11 + lea rbp,[((-320))+r9*2+rbp] + jmp NEAR $L$mul4xsp_done + +ALIGN 32 +$L$mul4xsp_alt: + lea r10,[((4096-320))+r9*2] + lea rbp,[((-320))+r9*2+rbp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rbp,r11 +$L$mul4xsp_done: + and rbp,-64 + mov r11,rsp + sub r11,rbp + and r11,-4096 + lea rsp,[rbp*1+r11] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$mul4x_page_walk + jmp NEAR $L$mul4x_page_walk_done + +$L$mul4x_page_walk: + lea rsp,[((-4096))+rsp] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$mul4x_page_walk +$L$mul4x_page_walk_done: + + neg r9 + + mov QWORD[40+rsp],rax + +$L$mul4x_body: + + call mul4x_internal + + mov rsi,QWORD[40+rsp] + + mov rax,1 + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$mul4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_mul4x_mont_gather5: + + +ALIGN 32 +mul4x_internal: + + shl r9,5 + movd xmm5,DWORD[56+rax] + lea rax,[$L$inc] + lea r13,[128+r9*1+rdx] + shr r9,5 + movdqa xmm0,XMMWORD[rax] + movdqa xmm1,XMMWORD[16+rax] + lea r10,[((88-112))+r9*1+rsp] + lea r12,[128+rdx] + + pshufd xmm5,xmm5,0 + movdqa xmm4,xmm1 + DB 0x67,0x67 + movdqa xmm2,xmm1 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + DB 0x67 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[112+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[128+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[144+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[160+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[176+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[192+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[208+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[224+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[240+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[256+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[272+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[288+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[304+r10],xmm0 + + paddd xmm3,xmm2 + DB 0x67 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[320+r10],xmm1 + + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[336+r10],xmm2 + pand xmm0,XMMWORD[64+r12] + + pand xmm1,XMMWORD[80+r12] + pand xmm2,XMMWORD[96+r12] + movdqa XMMWORD[352+r10],xmm3 + pand xmm3,XMMWORD[112+r12] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[((-128))+r12] + movdqa xmm5,XMMWORD[((-112))+r12] + movdqa xmm2,XMMWORD[((-96))+r12] + pand xmm4,XMMWORD[112+r10] + movdqa xmm3,XMMWORD[((-80))+r12] + pand xmm5,XMMWORD[128+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[144+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[160+r10] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[((-64))+r12] + movdqa xmm5,XMMWORD[((-48))+r12] + movdqa xmm2,XMMWORD[((-32))+r12] + pand xmm4,XMMWORD[176+r10] + movdqa xmm3,XMMWORD[((-16))+r12] + pand xmm5,XMMWORD[192+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[208+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[224+r10] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[r12] + movdqa xmm5,XMMWORD[16+r12] + movdqa xmm2,XMMWORD[32+r12] + pand xmm4,XMMWORD[240+r10] + movdqa xmm3,XMMWORD[48+r12] + pand xmm5,XMMWORD[256+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[272+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[288+r10] + por xmm0,xmm2 + por xmm1,xmm3 + por xmm0,xmm1 + + pshufd xmm1,xmm0,0x4e + por xmm0,xmm1 + lea r12,[256+r12] +DB 102,72,15,126,195 + + mov QWORD[((16+8))+rsp],r13 + mov QWORD[((56+8))+rsp],rdi + + mov r8,QWORD[r8] + mov rax,QWORD[rsi] + lea rsi,[r9*1+rsi] + neg r9 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + imul rbp,r10 + lea r14,[((64+8))+rsp] + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[32+r9] + lea rcx,[32+rcx] + adc rdx,0 + mov QWORD[r14],rdi + mov r13,rdx + jmp NEAR $L$1st4x + +ALIGN 32 +$L$1st4x: + mul rbx + add r10,rax + mov rax,QWORD[((-16))+rcx] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r14],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,[32+rcx] + adc rdx,0 + mov QWORD[r14],rdi + mov r13,rdx + + add r15,32 + jnz NEAR $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD[((-16))+rcx] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r14],rdi + mov r13,rdx + + lea rcx,[r9*1+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD[((-8))+r14],r13 + + jmp NEAR $L$outer4x + +ALIGN 32 +$L$outer4x: + lea rdx,[((16+128))+r14] + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movdqa xmm0,XMMWORD[((-128))+r12] + movdqa xmm1,XMMWORD[((-112))+r12] + movdqa xmm2,XMMWORD[((-96))+r12] + movdqa xmm3,XMMWORD[((-80))+r12] + pand xmm0,XMMWORD[((-128))+rdx] + pand xmm1,XMMWORD[((-112))+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[((-96))+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[((-80))+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[((-64))+r12] + movdqa xmm1,XMMWORD[((-48))+r12] + movdqa xmm2,XMMWORD[((-32))+r12] + movdqa xmm3,XMMWORD[((-16))+r12] + pand xmm0,XMMWORD[((-64))+rdx] + pand xmm1,XMMWORD[((-48))+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[((-32))+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[((-16))+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[r12] + movdqa xmm1,XMMWORD[16+r12] + movdqa xmm2,XMMWORD[32+r12] + movdqa xmm3,XMMWORD[48+r12] + pand xmm0,XMMWORD[rdx] + pand xmm1,XMMWORD[16+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[32+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[48+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[64+r12] + movdqa xmm1,XMMWORD[80+r12] + movdqa xmm2,XMMWORD[96+r12] + movdqa xmm3,XMMWORD[112+r12] + pand xmm0,XMMWORD[64+rdx] + pand xmm1,XMMWORD[80+rdx] + por xmm4,xmm0 + pand xmm2,XMMWORD[96+rdx] + por xmm5,xmm1 + pand xmm3,XMMWORD[112+rdx] + por xmm4,xmm2 + por xmm5,xmm3 + por xmm4,xmm5 + + pshufd xmm0,xmm4,0x4e + por xmm0,xmm4 + lea r12,[256+r12] +DB 102,72,15,126,195 + + mov r10,QWORD[r9*1+r14] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + mov QWORD[r14],rdi + + lea r14,[r9*1+r14] + + mul rbp + add r10,rax + mov rax,QWORD[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + add r11,QWORD[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[32+r9] + lea rcx,[32+rcx] + adc rdx,0 + mov r13,rdx + jmp NEAR $L$inner4x + +ALIGN 32 +$L$inner4x: + mul rbx + add r10,rax + mov rax,QWORD[((-16))+rcx] + adc rdx,0 + add r10,QWORD[16+r14] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + add r10,QWORD[r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-16))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + add r11,QWORD[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,[32+rcx] + adc rdx,0 + mov QWORD[((-8))+r14],r13 + mov r13,rdx + + add r15,32 + jnz NEAR $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD[((-16))+rcx] + adc rdx,0 + add r10,QWORD[16+r14] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,rbp + mov rbp,QWORD[((-8))+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov r13,rdx + + mov QWORD[((-16))+r14],rdi + lea rcx,[r9*1+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD[r14] + adc rdi,0 + mov QWORD[((-8))+r14],r13 + + cmp r12,QWORD[((16+8))+rsp] + jb NEAR $L$outer4x + xor rax,rax + sub rbp,r13 + adc r15,r15 + or rdi,r15 + sub rax,rdi + lea rbx,[r9*1+r14] + mov r12,QWORD[rcx] + lea rbp,[rcx] + mov rcx,r9 + sar rcx,3+2 + mov rdi,QWORD[((56+8))+rsp] + dec r12 + xor r10,r10 + mov r13,QWORD[8+rbp] + mov r14,QWORD[16+rbp] + mov r15,QWORD[24+rbp] + jmp NEAR $L$sqr4x_sub_entry + + +global bn_power5 + +ALIGN 32 +bn_power5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_power5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + mov rax,rsp + + lea r11,[OPENSSL_ia32cap_P] + mov r11d,DWORD[8+r11] + and r11d,0x80108 + cmp r11d,0x80108 + je NEAR $L$powerx5_enter + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$power5_prologue: + + shl r9d,3 + lea r10d,[r9*2+r9] + neg r9 + mov r8,QWORD[r8] + + + + + + + + + lea r11,[((-320))+r9*2+rsp] + mov rbp,rsp + sub r11,rdi + and r11,4095 + cmp r10,r11 + jb NEAR $L$pwr_sp_alt + sub rbp,r11 + lea rbp,[((-320))+r9*2+rbp] + jmp NEAR $L$pwr_sp_done + +ALIGN 32 +$L$pwr_sp_alt: + lea r10,[((4096-320))+r9*2] + lea rbp,[((-320))+r9*2+rbp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rbp,r11 +$L$pwr_sp_done: + and rbp,-64 + mov r11,rsp + sub r11,rbp + and r11,-4096 + lea rsp,[rbp*1+r11] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$pwr_page_walk + jmp NEAR $L$pwr_page_walk_done + +$L$pwr_page_walk: + lea rsp,[((-4096))+rsp] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$pwr_page_walk +$L$pwr_page_walk_done: + + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax + +$L$power5_body: +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rdi,rsi + mov rax,QWORD[40+rsp] + lea r8,[32+rsp] + + call mul4x_internal + + mov rsi,QWORD[40+rsp] + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$power5_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_power5: + +global bn_sqr8x_internal + + +ALIGN 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: + +_CET_ENDBR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rbp,[32+r10] + lea rsi,[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD[((-32))+rbp*1+rsi] + lea rdi,[((48+8))+r9*2+rsp] + mov rax,QWORD[((-24))+rbp*1+rsi] + lea rdi,[((-32))+rbp*1+rdi] + mov rbx,QWORD[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,rax + mov rax,rbx + mov r11,rdx + mov QWORD[((-24))+rbp*1+rdi],r10 + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + mov QWORD[((-16))+rbp*1+rdi],r11 + mov r10,rdx + + + mov rbx,QWORD[((-8))+rbp*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx + mov r13,rdx + + lea rcx,[rbp] + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD[((-8))+rcx*1+rdi],r10 + jmp NEAR $L$sqr4x_1st + +ALIGN 32 +$L$sqr4x_1st: + mov rbx,QWORD[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD[rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov rbx,QWORD[16+rcx*1+rsi] + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + + mul r15 + add r13,rax + mov rax,rbx + mov QWORD[8+rcx*1+rdi],r10 + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD[24+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD[16+rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + lea rcx,[32+rcx] + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne NEAR $L$sqr4x_1st + + mul r15 + add r13,rax + lea rbp,[16+rbp] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD[rdi],r13 + mov r12,rdx + mov QWORD[8+rdi],rdx + jmp NEAR $L$sqr4x_outer + +ALIGN 32 +$L$sqr4x_outer: + mov r14,QWORD[((-32))+rbp*1+rsi] + lea rdi,[((48+8))+r9*2+rsp] + mov rax,QWORD[((-24))+rbp*1+rsi] + lea rdi,[((-32))+rbp*1+rdi] + mov rbx,QWORD[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,QWORD[((-24))+rbp*1+rdi] + add r10,rax + mov rax,rbx + adc rdx,0 + mov QWORD[((-24))+rbp*1+rdi],r10 + mov r11,rdx + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + add r11,QWORD[((-16))+rbp*1+rdi] + mov r10,rdx + adc r10,0 + mov QWORD[((-16))+rbp*1+rdi],r11 + + xor r12,r12 + + mov rbx,QWORD[((-8))+rbp*1+rsi] + mul r15 + add r12,rax + mov rax,rbx + adc rdx,0 + add r12,QWORD[((-8))+rbp*1+rdi] + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD[((-8))+rbp*1+rdi],r10 + + lea rcx,[rbp] + jmp NEAR $L$sqr4x_inner + +ALIGN 32 +$L$sqr4x_inner: + mov rbx,QWORD[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + add r13,QWORD[rcx*1+rdi] + adc r12,0 + + DB 0x67 + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + mul r15 + add r12,rax + mov QWORD[rcx*1+rdi],r11 + mov rax,rbx + mov r13,rdx + adc r13,0 + add r12,QWORD[8+rcx*1+rdi] + lea rcx,[16+rcx] + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne NEAR $L$sqr4x_inner + + DB 0x67 + mul r15 + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD[rdi],r13 + mov r12,rdx + mov QWORD[8+rdi],rdx + + add rbp,16 + jnz NEAR $L$sqr4x_outer + + + mov r14,QWORD[((-32))+rsi] + lea rdi,[((48+8))+r9*2+rsp] + mov rax,QWORD[((-24))+rsi] + lea rdi,[((-32))+rbp*1+rdi] + mov rbx,QWORD[((-16))+rsi] + mov r15,rax + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + + mul r14 + add r11,rax + mov rax,rbx + mov QWORD[((-24))+rdi],r10 + mov r10,rdx + adc r10,0 + add r11,r13 + mov rbx,QWORD[((-8))+rsi] + adc r10,0 + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD[((-16))+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD[((-8))+rdi],r10 + + mul r15 + add r13,rax + mov rax,QWORD[((-16))+rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD[rdi],r13 + mov r12,rdx + mov QWORD[8+rdi],rdx + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 + adc rdx,0 + mov QWORD[8+rdi],rax + mov QWORD[16+rdi],rdx + mov QWORD[24+rdi],r15 + + mov rax,QWORD[((-16))+rbp*1+rsi] + lea rdi,[((48+8))+rsp] + xor r10,r10 + mov r11,QWORD[8+rdi] + + lea r12,[r10*2+r14] + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[24+rdi] + adc r12,rax + mov rax,QWORD[((-8))+rbp*1+rsi] + mov QWORD[rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[40+rdi] + adc rbx,rax + mov rax,QWORD[rbp*1+rsi] + mov QWORD[16+rdi],rbx + adc r8,rdx + lea rbp,[16+rbp] + mov QWORD[24+rdi],r8 + sbb r15,r15 + lea rdi,[64+rdi] + jmp NEAR $L$sqr4x_shift_n_add + +ALIGN 32 +$L$sqr4x_shift_n_add: + lea r12,[r10*2+r14] + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[((-8))+rdi] + adc r12,rax + mov rax,QWORD[((-8))+rbp*1+rsi] + mov QWORD[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD[rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[8+rdi] + adc rbx,rax + mov rax,QWORD[rbp*1+rsi] + mov QWORD[((-16))+rdi],rbx + adc r8,rdx + + lea r12,[r10*2+r14] + mov QWORD[((-8))+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[24+rdi] + adc r12,rax + mov rax,QWORD[8+rbp*1+rsi] + mov QWORD[rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[40+rdi] + adc rbx,rax + mov rax,QWORD[16+rbp*1+rsi] + mov QWORD[16+rdi],rbx + adc r8,rdx + mov QWORD[24+rdi],r8 + sbb r15,r15 + lea rdi,[64+rdi] + add rbp,32 + jnz NEAR $L$sqr4x_shift_n_add + + lea r12,[r10*2+r14] + DB 0x67 + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[((-8))+rdi] + adc r12,rax + mov rax,QWORD[((-8))+rsi] + mov QWORD[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD[((-16))+rdi],rbx + mov QWORD[((-8))+rdi],r8 +DB 102,72,15,126,213 +__bn_sqr8x_reduction: + xor rax,rax + lea rcx,[rbp*1+r9] + lea rdx,[((48+8))+r9*2+rsp] + mov QWORD[((0+8))+rsp],rcx + lea rdi,[((48+8))+r9*1+rsp] + mov QWORD[((8+8))+rsp],rdx + neg r9 + jmp NEAR $L$8x_reduction_loop + +ALIGN 32 +$L$8x_reduction_loop: + lea rdi,[r9*1+rdi] + DB 0x66 + mov rbx,QWORD[rdi] + mov r9,QWORD[8+rdi] + mov r10,QWORD[16+rdi] + mov r11,QWORD[24+rdi] + mov r12,QWORD[32+rdi] + mov r13,QWORD[40+rdi] + mov r14,QWORD[48+rdi] + mov r15,QWORD[56+rdi] + mov QWORD[rdx],rax + lea rdi,[64+rdi] + + DB 0x67 + mov r8,rbx + imul rbx,QWORD[((32+8))+rsp] + mov rax,QWORD[rbp] + mov ecx,8 + jmp NEAR $L$8x_reduce + +ALIGN 32 +$L$8x_reduce: + mul rbx + mov rax,QWORD[8+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD[16+rbp] + adc rdx,0 + add r8,r9 + mov QWORD[((48-8+8))+rcx*8+rsp],rbx + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD[24+rbp] + adc rdx,0 + add r9,r10 + mov rsi,QWORD[((32+8))+rsp] + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD[32+rbp] + adc rdx,0 + imul rsi,r8 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD[40+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jnz NEAR $L$8x_reduce + + lea rbp,[64+rbp] + xor rax,rax + mov rdx,QWORD[((8+8))+rsp] + cmp rbp,QWORD[((0+8))+rsp] + jae NEAR $L$8x_no_tail + + DB 0x66 + add r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + sbb rsi,rsi + + mov rbx,QWORD[((48+56+8))+rsp] + mov ecx,8 + mov rax,QWORD[rbp] + jmp NEAR $L$8x_tail + +ALIGN 32 +$L$8x_tail: + mul rbx + add r8,rax + mov rax,QWORD[8+rbp] + mov QWORD[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD[16+rbp] + adc rdx,0 + add r8,r9 + lea rdi,[8+rdi] + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD[24+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD[32+rbp] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD[40+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,QWORD[((48-16+8))+rcx*8+rsp] + add r15,rax + adc rdx,0 + add r14,r15 + mov rax,QWORD[rbp] + mov r15,rdx + adc r15,0 + + dec ecx + jnz NEAR $L$8x_tail + + lea rbp,[64+rbp] + mov rdx,QWORD[((8+8))+rsp] + cmp rbp,QWORD[((0+8))+rsp] + jae NEAR $L$8x_tail_done + + mov rbx,QWORD[((48+56+8))+rsp] + neg rsi + mov rax,QWORD[rbp] + adc r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + sbb rsi,rsi + + mov ecx,8 + jmp NEAR $L$8x_tail + +ALIGN 32 +$L$8x_tail_done: + xor rax,rax + add r8,QWORD[rdx] + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + adc r14,0 + adc r15,0 + adc rax,0 + + neg rsi +$L$8x_no_tail: + adc r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + adc rax,0 + mov rcx,QWORD[((-8))+rbp] + xor rsi,rsi + +DB 102,72,15,126,213 + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 +DB 102,73,15,126,217 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + lea rdi,[64+rdi] + + cmp rdi,rdx + jb NEAR $L$8x_reduction_loop + ret + + + +ALIGN 32 +__bn_post4x_internal: + + mov r12,QWORD[rbp] + lea rbx,[r9*1+rdi] + mov rcx,r9 +DB 102,72,15,126,207 + neg rax +DB 102,72,15,126,206 + sar rcx,3+2 + dec r12 + xor r10,r10 + mov r13,QWORD[8+rbp] + mov r14,QWORD[16+rbp] + mov r15,QWORD[24+rbp] + jmp NEAR $L$sqr4x_sub_entry + +ALIGN 16 +$L$sqr4x_sub: + mov r12,QWORD[rbp] + mov r13,QWORD[8+rbp] + mov r14,QWORD[16+rbp] + mov r15,QWORD[24+rbp] +$L$sqr4x_sub_entry: + lea rbp,[32+rbp] + not r12 + not r13 + not r14 + not r15 + and r12,rax + and r13,rax + and r14,rax + and r15,rax + + neg r10 + adc r12,QWORD[rbx] + adc r13,QWORD[8+rbx] + adc r14,QWORD[16+rbx] + adc r15,QWORD[24+rbx] + mov QWORD[rdi],r12 + lea rbx,[32+rbx] + mov QWORD[8+rdi],r13 + sbb r10,r10 + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + lea rdi,[32+rdi] + + inc rcx + jnz NEAR $L$sqr4x_sub + + mov r10,r9 + neg r9 + ret + + + +ALIGN 32 +bn_mulx4x_mont_gather5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mulx4x_mont_gather5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + + mov rax,rsp + +$L$mulx4x_enter: + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$mulx4x_prologue: + + shl r9d,3 + lea r10,[r9*2+r9] + neg r9 + mov r8,QWORD[r8] + + + + + + + + + + + lea r11,[((-320))+r9*2+rsp] + mov rbp,rsp + sub r11,rdi + and r11,4095 + cmp r10,r11 + jb NEAR $L$mulx4xsp_alt + sub rbp,r11 + lea rbp,[((-320))+r9*2+rbp] + jmp NEAR $L$mulx4xsp_done + +$L$mulx4xsp_alt: + lea r10,[((4096-320))+r9*2] + lea rbp,[((-320))+r9*2+rbp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rbp,r11 +$L$mulx4xsp_done: + and rbp,-64 + mov r11,rsp + sub r11,rbp + and r11,-4096 + lea rsp,[rbp*1+r11] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$mulx4x_page_walk + jmp NEAR $L$mulx4x_page_walk_done + +$L$mulx4x_page_walk: + lea rsp,[((-4096))+rsp] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$mulx4x_page_walk +$L$mulx4x_page_walk_done: + + + + + + + + + + + + + + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax + +$L$mulx4x_body: + call mulx4x_internal + + mov rsi,QWORD[40+rsp] + + mov rax,1 + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$mulx4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_mulx4x_mont_gather5: + + +ALIGN 32 +mulx4x_internal: + + mov QWORD[8+rsp],r9 + mov r10,r9 + neg r9 + shl r9,5 + neg r10 + lea r13,[128+r9*1+rdx] + shr r9,5+5 + movd xmm5,DWORD[56+rax] + sub r9,1 + lea rax,[$L$inc] + mov QWORD[((16+8))+rsp],r13 + mov QWORD[((24+8))+rsp],r9 + mov QWORD[((56+8))+rsp],rdi + movdqa xmm0,XMMWORD[rax] + movdqa xmm1,XMMWORD[16+rax] + lea r10,[((88-112))+r10*1+rsp] + lea rdi,[128+rdx] + + pshufd xmm5,xmm5,0 + movdqa xmm4,xmm1 + DB 0x67 + movdqa xmm2,xmm1 + DB 0x67 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[112+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[128+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[144+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[160+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[176+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[192+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[208+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[224+r10],xmm3 + movdqa xmm3,xmm4 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[240+r10],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[256+r10],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[272+r10],xmm2 + movdqa xmm2,xmm4 + + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[288+r10],xmm3 + movdqa xmm3,xmm4 + DB 0x67 + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[304+r10],xmm0 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[320+r10],xmm1 + + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[336+r10],xmm2 + + pand xmm0,XMMWORD[64+rdi] + pand xmm1,XMMWORD[80+rdi] + pand xmm2,XMMWORD[96+rdi] + movdqa XMMWORD[352+r10],xmm3 + pand xmm3,XMMWORD[112+rdi] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[((-128))+rdi] + movdqa xmm5,XMMWORD[((-112))+rdi] + movdqa xmm2,XMMWORD[((-96))+rdi] + pand xmm4,XMMWORD[112+r10] + movdqa xmm3,XMMWORD[((-80))+rdi] + pand xmm5,XMMWORD[128+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[144+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[160+r10] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[((-64))+rdi] + movdqa xmm5,XMMWORD[((-48))+rdi] + movdqa xmm2,XMMWORD[((-32))+rdi] + pand xmm4,XMMWORD[176+r10] + movdqa xmm3,XMMWORD[((-16))+rdi] + pand xmm5,XMMWORD[192+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[208+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[224+r10] + por xmm0,xmm2 + por xmm1,xmm3 + movdqa xmm4,XMMWORD[rdi] + movdqa xmm5,XMMWORD[16+rdi] + movdqa xmm2,XMMWORD[32+rdi] + pand xmm4,XMMWORD[240+r10] + movdqa xmm3,XMMWORD[48+rdi] + pand xmm5,XMMWORD[256+r10] + por xmm0,xmm4 + pand xmm2,XMMWORD[272+r10] + por xmm1,xmm5 + pand xmm3,XMMWORD[288+r10] + por xmm0,xmm2 + por xmm1,xmm3 + pxor xmm0,xmm1 + + pshufd xmm1,xmm0,0x4e + por xmm0,xmm1 + lea rdi,[256+rdi] +DB 102,72,15,126,194 + lea rbx,[((64+32+8))+rsp] + + mov r9,rdx + mulx rax,r8,QWORD[rsi] + mulx r12,r11,QWORD[8+rsi] + add r11,rax + mulx r13,rax,QWORD[16+rsi] + adc r12,rax + adc r13,0 + mulx r14,rax,QWORD[24+rsi] + + mov r15,r8 + imul r8,QWORD[((32+8))+rsp] + xor rbp,rbp + mov rdx,r8 + + mov QWORD[((8+8))+rsp],rdi + + lea rsi,[32+rsi] + adcx r13,rax + adcx r14,rbp + + mulx r10,rax,QWORD[rcx] + adcx r15,rax + adox r10,r11 + mulx r11,rax,QWORD[8+rcx] + adcx r10,rax + adox r11,r12 + mulx r12,rax,QWORD[16+rcx] + mov rdi,QWORD[((24+8))+rsp] + mov QWORD[((-32))+rbx],r10 + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov QWORD[((-24))+rbx],r11 + adcx r12,rax + adox r15,rbp + lea rcx,[32+rcx] + mov QWORD[((-16))+rbx],r12 + jmp NEAR $L$mulx4x_1st + +ALIGN 32 +$L$mulx4x_1st: + adcx r15,rbp + mulx rax,r10,QWORD[rsi] + adcx r10,r14 + mulx r14,r11,QWORD[8+rsi] + adcx r11,rax + mulx rax,r12,QWORD[16+rsi] + adcx r12,r14 + mulx r14,r13,QWORD[24+rsi] + DB 0x67,0x67 + mov rdx,r8 + adcx r13,rax + adcx r14,rbp + lea rsi,[32+rsi] + lea rbx,[32+rbx] + + adox r10,r15 + mulx r15,rax,QWORD[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD[8+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD[16+rcx] + mov QWORD[((-40))+rbx],r10 + adcx r12,rax + mov QWORD[((-32))+rbx],r11 + adox r13,r15 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov QWORD[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + lea rcx,[32+rcx] + mov QWORD[((-16))+rbx],r13 + + dec rdi + jnz NEAR $L$mulx4x_1st + + mov rax,QWORD[8+rsp] + adc r15,rbp + lea rsi,[rax*1+rsi] + add r14,r15 + mov rdi,QWORD[((8+8))+rsp] + adc rbp,rbp + mov QWORD[((-8))+rbx],r14 + jmp NEAR $L$mulx4x_outer + +ALIGN 32 +$L$mulx4x_outer: + lea r10,[((16-256))+rbx] + pxor xmm4,xmm4 + DB 0x67,0x67 + pxor xmm5,xmm5 + movdqa xmm0,XMMWORD[((-128))+rdi] + movdqa xmm1,XMMWORD[((-112))+rdi] + movdqa xmm2,XMMWORD[((-96))+rdi] + pand xmm0,XMMWORD[256+r10] + movdqa xmm3,XMMWORD[((-80))+rdi] + pand xmm1,XMMWORD[272+r10] + por xmm4,xmm0 + pand xmm2,XMMWORD[288+r10] + por xmm5,xmm1 + pand xmm3,XMMWORD[304+r10] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[((-64))+rdi] + movdqa xmm1,XMMWORD[((-48))+rdi] + movdqa xmm2,XMMWORD[((-32))+rdi] + pand xmm0,XMMWORD[320+r10] + movdqa xmm3,XMMWORD[((-16))+rdi] + pand xmm1,XMMWORD[336+r10] + por xmm4,xmm0 + pand xmm2,XMMWORD[352+r10] + por xmm5,xmm1 + pand xmm3,XMMWORD[368+r10] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[rdi] + movdqa xmm1,XMMWORD[16+rdi] + movdqa xmm2,XMMWORD[32+rdi] + pand xmm0,XMMWORD[384+r10] + movdqa xmm3,XMMWORD[48+rdi] + pand xmm1,XMMWORD[400+r10] + por xmm4,xmm0 + pand xmm2,XMMWORD[416+r10] + por xmm5,xmm1 + pand xmm3,XMMWORD[432+r10] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[64+rdi] + movdqa xmm1,XMMWORD[80+rdi] + movdqa xmm2,XMMWORD[96+rdi] + pand xmm0,XMMWORD[448+r10] + movdqa xmm3,XMMWORD[112+rdi] + pand xmm1,XMMWORD[464+r10] + por xmm4,xmm0 + pand xmm2,XMMWORD[480+r10] + por xmm5,xmm1 + pand xmm3,XMMWORD[496+r10] + por xmm4,xmm2 + por xmm5,xmm3 + por xmm4,xmm5 + + pshufd xmm0,xmm4,0x4e + por xmm0,xmm4 + lea rdi,[256+rdi] +DB 102,72,15,126,194 + + mov QWORD[rbx],rbp + lea rbx,[32+rax*1+rbx] + mulx r11,r8,QWORD[rsi] + xor rbp,rbp + mov r9,rdx + mulx r12,r14,QWORD[8+rsi] + adox r8,QWORD[((-32))+rbx] + adcx r11,r14 + mulx r13,r15,QWORD[16+rsi] + adox r11,QWORD[((-24))+rbx] + adcx r12,r15 + mulx r14,rdx,QWORD[24+rsi] + adox r12,QWORD[((-16))+rbx] + adcx r13,rdx + lea rcx,[rax*1+rcx] + lea rsi,[32+rsi] + adox r13,QWORD[((-8))+rbx] + adcx r14,rbp + adox r14,rbp + + mov r15,r8 + imul r8,QWORD[((32+8))+rsp] + + mov rdx,r8 + xor rbp,rbp + mov QWORD[((8+8))+rsp],rdi + + mulx r10,rax,QWORD[rcx] + adcx r15,rax + adox r10,r11 + mulx r11,rax,QWORD[8+rcx] + adcx r10,rax + adox r11,r12 + mulx r12,rax,QWORD[16+rcx] + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + mov rdi,QWORD[((24+8))+rsp] + mov QWORD[((-32))+rbx],r10 + adcx r12,rax + mov QWORD[((-24))+rbx],r11 + adox r15,rbp + mov QWORD[((-16))+rbx],r12 + lea rcx,[32+rcx] + jmp NEAR $L$mulx4x_inner + +ALIGN 32 +$L$mulx4x_inner: + mulx rax,r10,QWORD[rsi] + adcx r15,rbp + adox r10,r14 + mulx r14,r11,QWORD[8+rsi] + adcx r10,QWORD[rbx] + adox r11,rax + mulx rax,r12,QWORD[16+rsi] + adcx r11,QWORD[8+rbx] + adox r12,r14 + mulx r14,r13,QWORD[24+rsi] + mov rdx,r8 + adcx r12,QWORD[16+rbx] + adox r13,rax + adcx r13,QWORD[24+rbx] + adox r14,rbp + lea rsi,[32+rsi] + lea rbx,[32+rbx] + adcx r14,rbp + + adox r10,r15 + mulx r15,rax,QWORD[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD[8+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD[16+rcx] + mov QWORD[((-40))+rbx],r10 + adcx r12,rax + adox r13,r15 + mov QWORD[((-32))+rbx],r11 + mulx r15,rax,QWORD[24+rcx] + mov rdx,r9 + lea rcx,[32+rcx] + mov QWORD[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + mov QWORD[((-16))+rbx],r13 + + dec rdi + jnz NEAR $L$mulx4x_inner + + mov rax,QWORD[((0+8))+rsp] + adc r15,rbp + sub rdi,QWORD[rbx] + mov rdi,QWORD[((8+8))+rsp] + mov r10,QWORD[((16+8))+rsp] + adc r14,r15 + lea rsi,[rax*1+rsi] + adc rbp,rbp + mov QWORD[((-8))+rbx],r14 + + cmp rdi,r10 + jb NEAR $L$mulx4x_outer + + mov r10,QWORD[((-8))+rcx] + mov r8,rbp + mov r12,QWORD[rax*1+rcx] + lea rbp,[rax*1+rcx] + mov rcx,rax + lea rdi,[rax*1+rbx] + xor eax,eax + xor r15,r15 + sub r10,r14 + adc r15,r15 + or r8,r15 + sar rcx,3+2 + sub rax,r8 + mov rdx,QWORD[((56+8))+rsp] + dec r12 + mov r13,QWORD[8+rbp] + xor r8,r8 + mov r14,QWORD[16+rbp] + mov r15,QWORD[24+rbp] + jmp NEAR $L$sqrx4x_sub_entry + + + +ALIGN 32 +bn_powerx5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_powerx5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + + mov rax,rsp + +$L$powerx5_enter: + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$powerx5_prologue: + + shl r9d,3 + lea r10,[r9*2+r9] + neg r9 + mov r8,QWORD[r8] + + + + + + + + + lea r11,[((-320))+r9*2+rsp] + mov rbp,rsp + sub r11,rdi + and r11,4095 + cmp r10,r11 + jb NEAR $L$pwrx_sp_alt + sub rbp,r11 + lea rbp,[((-320))+r9*2+rbp] + jmp NEAR $L$pwrx_sp_done + +ALIGN 32 +$L$pwrx_sp_alt: + lea r10,[((4096-320))+r9*2] + lea rbp,[((-320))+r9*2+rbp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rbp,r11 +$L$pwrx_sp_done: + and rbp,-64 + mov r11,rsp + sub r11,rbp + and r11,-4096 + lea rsp,[rbp*1+r11] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$pwrx_page_walk + jmp NEAR $L$pwrx_page_walk_done + +$L$pwrx_page_walk: + lea rsp,[((-4096))+rsp] + mov r10,QWORD[rsp] + cmp rsp,rbp + ja NEAR $L$pwrx_page_walk +$L$pwrx_page_walk_done: + + mov r10,r9 + neg r9 + + + + + + + + + + + + + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax + +$L$powerx5_body: + + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + + mov r9,r10 + mov rdi,rsi +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rax,QWORD[40+rsp] + + call mulx4x_internal + + mov rsi,QWORD[40+rsp] + + mov rax,1 + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$powerx5_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_bn_powerx5: + +global bn_sqrx8x_internal + + +ALIGN 32 +bn_sqrx8x_internal: +__bn_sqrx8x_internal: + +_CET_ENDBR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rdi,[((48+8))+rsp] + lea rbp,[r9*1+rsi] + mov QWORD[((0+8))+rsp],r9 + mov QWORD[((8+8))+rsp],rbp + jmp NEAR $L$sqr8x_zero_start + +ALIGN 32 + DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +$L$sqrx8x_zero: + DB 0x3e + movdqa XMMWORD[rdi],xmm0 + movdqa XMMWORD[16+rdi],xmm0 + movdqa XMMWORD[32+rdi],xmm0 + movdqa XMMWORD[48+rdi],xmm0 +$L$sqr8x_zero_start: + movdqa XMMWORD[64+rdi],xmm0 + movdqa XMMWORD[80+rdi],xmm0 + movdqa XMMWORD[96+rdi],xmm0 + movdqa XMMWORD[112+rdi],xmm0 + lea rdi,[128+rdi] + sub r9,64 + jnz NEAR $L$sqrx8x_zero + + mov rdx,QWORD[rsi] + + xor r10,r10 + xor r11,r11 + xor r12,r12 + xor r13,r13 + xor r14,r14 + xor r15,r15 + lea rdi,[((48+8))+rsp] + xor rbp,rbp + jmp NEAR $L$sqrx8x_outer_loop + +ALIGN 32 +$L$sqrx8x_outer_loop: + mulx rax,r8,QWORD[8+rsi] + adcx r8,r9 + adox r10,rax + mulx rax,r9,QWORD[16+rsi] + adcx r9,r10 + adox r11,rax + DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcx r10,r11 + adox r12,rax + DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcx r11,r12 + adox r13,rax + mulx rax,r12,QWORD[40+rsi] + adcx r12,r13 + adox r14,rax + mulx rax,r13,QWORD[48+rsi] + adcx r13,r14 + adox rax,r15 + mulx r15,r14,QWORD[56+rsi] + mov rdx,QWORD[8+rsi] + adcx r14,rax + adox r15,rbp + adc r15,QWORD[64+rdi] + mov QWORD[8+rdi],r8 + mov QWORD[16+rdi],r9 + sbb rcx,rcx + xor rbp,rbp + + + mulx rbx,r8,QWORD[16+rsi] + mulx rax,r9,QWORD[24+rsi] + adcx r8,r10 + adox r9,rbx + mulx rbx,r10,QWORD[32+rsi] + adcx r9,r11 + adox r10,rax + DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcx r10,r12 + adox r11,rbx + DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcx r11,r13 + adox r12,r14 + DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + mov rdx,QWORD[16+rsi] + adcx r12,rax + adox r13,rbx + adcx r13,r15 + adox r14,rbp + adcx r14,rbp + + mov QWORD[24+rdi],r8 + mov QWORD[32+rdi],r9 + + mulx rbx,r8,QWORD[24+rsi] + mulx rax,r9,QWORD[32+rsi] + adcx r8,r10 + adox r9,rbx + mulx rbx,r10,QWORD[40+rsi] + adcx r9,r11 + adox r10,rax + DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcx r10,r12 + adox r11,r13 + DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 + DB 0x3e + mov rdx,QWORD[24+rsi] + adcx r11,rbx + adox r12,rax + adcx r12,r14 + mov QWORD[40+rdi],r8 + mov QWORD[48+rdi],r9 + mulx rax,r8,QWORD[32+rsi] + adox r13,rbp + adcx r13,rbp + + mulx rbx,r9,QWORD[40+rsi] + adcx r8,r10 + adox r9,rax + mulx rax,r10,QWORD[48+rsi] + adcx r9,r11 + adox r10,r12 + mulx r12,r11,QWORD[56+rsi] + mov rdx,QWORD[32+rsi] + mov r14,QWORD[40+rsi] + adcx r10,rbx + adox r11,rax + mov r15,QWORD[48+rsi] + adcx r11,r13 + adox r12,rbp + adcx r12,rbp + + mov QWORD[56+rdi],r8 + mov QWORD[64+rdi],r9 + + mulx rax,r9,r14 + mov r8,QWORD[56+rsi] + adcx r9,r10 + mulx rbx,r10,r15 + adox r10,rax + adcx r10,r11 + mulx rax,r11,r8 + mov rdx,r14 + adox r11,rbx + adcx r11,r12 + + adcx rax,rbp + + mulx rbx,r14,r15 + mulx r13,r12,r8 + mov rdx,r15 + lea rsi,[64+rsi] + adcx r11,r14 + adox r12,rbx + adcx r12,rax + adox r13,rbp + + DB 0x67,0x67 + mulx r14,r8,r8 + adcx r13,r8 + adcx r14,rbp + + cmp rsi,QWORD[((8+8))+rsp] + je NEAR $L$sqrx8x_outer_break + + neg rcx + mov rcx,-8 + mov r15,rbp + mov r8,QWORD[64+rdi] + adcx r9,QWORD[72+rdi] + adcx r10,QWORD[80+rdi] + adcx r11,QWORD[88+rdi] + adc r12,QWORD[96+rdi] + adc r13,QWORD[104+rdi] + adc r14,QWORD[112+rdi] + adc r15,QWORD[120+rdi] + lea rbp,[rsi] + lea rdi,[128+rdi] + sbb rax,rax + + mov rdx,QWORD[((-64))+rsi] + mov QWORD[((16+8))+rsp],rax + mov QWORD[((24+8))+rsp],rdi + + + xor eax,eax + jmp NEAR $L$sqrx8x_loop + +ALIGN 32 +$L$sqrx8x_loop: + mov rbx,r8 + mulx r8,rax,QWORD[rbp] + adcx rbx,rax + adox r8,r9 + + mulx r9,rax,QWORD[8+rbp] + adcx r8,rax + adox r9,r10 + + mulx r10,rax,QWORD[16+rbp] + adcx r9,rax + adox r10,r11 + + mulx r11,rax,QWORD[24+rbp] + adcx r10,rax + adox r11,r12 + + DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD[40+rbp] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD[48+rbp] + mov QWORD[rcx*8+rdi],rbx + mov ebx,0 + adcx r13,rax + adox r14,r15 + + DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + mov rdx,QWORD[8+rcx*8+rsi] + adcx r14,rax + adox r15,rbx + adcx r15,rbx + + DB 0x67 + inc rcx + jnz NEAR $L$sqrx8x_loop + + lea rbp,[64+rbp] + mov rcx,-8 + cmp rbp,QWORD[((8+8))+rsp] + je NEAR $L$sqrx8x_break + + sub rbx,QWORD[((16+8))+rsp] + DB 0x66 + mov rdx,QWORD[((-64))+rsi] + adcx r8,QWORD[rdi] + adcx r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + lea rdi,[64+rdi] + DB 0x67 + sbb rax,rax + xor ebx,ebx + mov QWORD[((16+8))+rsp],rax + jmp NEAR $L$sqrx8x_loop + +ALIGN 32 +$L$sqrx8x_break: + xor rbp,rbp + sub rbx,QWORD[((16+8))+rsp] + adcx r8,rbp + mov rcx,QWORD[((24+8))+rsp] + adcx r9,rbp + mov rdx,QWORD[rsi] + adc r10,0 + mov QWORD[rdi],r8 + adc r11,0 + adc r12,0 + adc r13,0 + adc r14,0 + adc r15,0 + cmp rdi,rcx + je NEAR $L$sqrx8x_outer_loop + + mov QWORD[8+rdi],r9 + mov r9,QWORD[8+rcx] + mov QWORD[16+rdi],r10 + mov r10,QWORD[16+rcx] + mov QWORD[24+rdi],r11 + mov r11,QWORD[24+rcx] + mov QWORD[32+rdi],r12 + mov r12,QWORD[32+rcx] + mov QWORD[40+rdi],r13 + mov r13,QWORD[40+rcx] + mov QWORD[48+rdi],r14 + mov r14,QWORD[48+rcx] + mov QWORD[56+rdi],r15 + mov r15,QWORD[56+rcx] + mov rdi,rcx + jmp NEAR $L$sqrx8x_outer_loop + +ALIGN 32 +$L$sqrx8x_outer_break: + mov QWORD[72+rdi],r9 +DB 102,72,15,126,217 + mov QWORD[80+rdi],r10 + mov QWORD[88+rdi],r11 + mov QWORD[96+rdi],r12 + mov QWORD[104+rdi],r13 + mov QWORD[112+rdi],r14 + lea rdi,[((48+8))+rsp] + mov rdx,QWORD[rcx*1+rsi] + + mov r11,QWORD[8+rdi] + xor r10,r10 + mov r9,QWORD[((0+8))+rsp] + adox r11,r11 + mov r12,QWORD[16+rdi] + mov r13,QWORD[24+rdi] + + +ALIGN 32 +$L$sqrx4x_shift_n_add: + mulx rbx,rax,rdx + adox r12,r12 + adcx rax,r10 + DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 + DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adox r13,r13 + adcx rbx,r11 + mov r11,QWORD[40+rdi] + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + + mulx rbx,rax,rdx + adox r10,r10 + adcx rax,r12 + mov rdx,QWORD[16+rcx*1+rsi] + mov r12,QWORD[48+rdi] + adox r11,r11 + adcx rbx,r13 + mov r13,QWORD[56+rdi] + mov QWORD[16+rdi],rax + mov QWORD[24+rdi],rbx + + mulx rbx,rax,rdx + adox r12,r12 + adcx rax,r10 + mov rdx,QWORD[24+rcx*1+rsi] + lea rcx,[32+rcx] + mov r10,QWORD[64+rdi] + adox r13,r13 + adcx rbx,r11 + mov r11,QWORD[72+rdi] + mov QWORD[32+rdi],rax + mov QWORD[40+rdi],rbx + + mulx rbx,rax,rdx + adox r10,r10 + adcx rax,r12 + jrcxz $L$sqrx4x_shift_n_add_break + DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adox r11,r11 + adcx rbx,r13 + mov r12,QWORD[80+rdi] + mov r13,QWORD[88+rdi] + mov QWORD[48+rdi],rax + mov QWORD[56+rdi],rbx + lea rdi,[64+rdi] + nop + jmp NEAR $L$sqrx4x_shift_n_add + +ALIGN 32 +$L$sqrx4x_shift_n_add_break: + adcx rbx,r13 + mov QWORD[48+rdi],rax + mov QWORD[56+rdi],rbx + lea rdi,[64+rdi] +DB 102,72,15,126,213 +__bn_sqrx8x_reduction: + xor eax,eax + mov rbx,QWORD[((32+8))+rsp] + mov rdx,QWORD[((48+8))+rsp] + lea rcx,[((-64))+r9*1+rbp] + + mov QWORD[((0+8))+rsp],rcx + mov QWORD[((8+8))+rsp],rdi + + lea rdi,[((48+8))+rsp] + jmp NEAR $L$sqrx8x_reduction_loop + +ALIGN 32 +$L$sqrx8x_reduction_loop: + mov r9,QWORD[8+rdi] + mov r10,QWORD[16+rdi] + mov r11,QWORD[24+rdi] + mov r12,QWORD[32+rdi] + mov r8,rdx + imul rdx,rbx + mov r13,QWORD[40+rdi] + mov r14,QWORD[48+rdi] + mov r15,QWORD[56+rdi] + mov QWORD[((24+8))+rsp],rax + + lea rdi,[64+rdi] + xor rsi,rsi + mov rcx,-8 + jmp NEAR $L$sqrx8x_reduce + +ALIGN 32 +$L$sqrx8x_reduce: + mov rbx,r8 + mulx r8,rax,QWORD[rbp] + adcx rax,rbx + adox r8,r9 + + mulx r9,rbx,QWORD[8+rbp] + adcx r8,rbx + adox r9,r10 + + mulx r10,rbx,QWORD[16+rbp] + adcx r9,rbx + adox r10,r11 + + mulx r11,rbx,QWORD[24+rbp] + adcx r10,rbx + adox r11,r12 + + DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + mov rax,rdx + mov rdx,r8 + adcx r11,rbx + adox r12,r13 + + mulx rdx,rbx,QWORD[((32+8))+rsp] + mov rdx,rax + mov QWORD[((64+48+8))+rcx*8+rsp],rax + + mulx r13,rax,QWORD[40+rbp] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD[48+rbp] + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD[56+rbp] + mov rdx,rbx + adcx r14,rax + adox r15,rsi + adcx r15,rsi + + DB 0x67,0x67,0x67 + inc rcx + jnz NEAR $L$sqrx8x_reduce + + mov rax,rsi + cmp rbp,QWORD[((0+8))+rsp] + jae NEAR $L$sqrx8x_no_tail + + mov rdx,QWORD[((48+8))+rsp] + add r8,QWORD[rdi] + lea rbp,[64+rbp] + mov rcx,-8 + adcx r9,QWORD[8+rdi] + adcx r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + lea rdi,[64+rdi] + sbb rax,rax + + xor rsi,rsi + mov QWORD[((16+8))+rsp],rax + jmp NEAR $L$sqrx8x_tail + +ALIGN 32 +$L$sqrx8x_tail: + mov rbx,r8 + mulx r8,rax,QWORD[rbp] + adcx rbx,rax + adox r8,r9 + + mulx r9,rax,QWORD[8+rbp] + adcx r8,rax + adox r9,r10 + + mulx r10,rax,QWORD[16+rbp] + adcx r9,rax + adox r10,r11 + + mulx r11,rax,QWORD[24+rbp] + adcx r10,rax + adox r11,r12 + + DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD[40+rbp] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD[48+rbp] + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD[56+rbp] + mov rdx,QWORD[((72+48+8))+rcx*8+rsp] + adcx r14,rax + adox r15,rsi + mov QWORD[rcx*8+rdi],rbx + mov rbx,r8 + adcx r15,rsi + + inc rcx + jnz NEAR $L$sqrx8x_tail + + cmp rbp,QWORD[((0+8))+rsp] + jae NEAR $L$sqrx8x_tail_done + + sub rsi,QWORD[((16+8))+rsp] + mov rdx,QWORD[((48+8))+rsp] + lea rbp,[64+rbp] + adc r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + lea rdi,[64+rdi] + sbb rax,rax + sub rcx,8 + + xor rsi,rsi + mov QWORD[((16+8))+rsp],rax + jmp NEAR $L$sqrx8x_tail + +ALIGN 32 +$L$sqrx8x_tail_done: + xor rax,rax + add r8,QWORD[((24+8))+rsp] + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + adc r14,0 + adc r15,0 + adc rax,0 + + sub rsi,QWORD[((16+8))+rsp] +$L$sqrx8x_no_tail: + adc r8,QWORD[rdi] +DB 102,72,15,126,217 + adc r9,QWORD[8+rdi] + mov rsi,QWORD[56+rbp] +DB 102,72,15,126,213 + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + adc rax,0 + + mov rbx,QWORD[((32+8))+rsp] + mov rdx,QWORD[64+rcx*1+rdi] + + mov QWORD[rdi],r8 + lea r8,[64+rdi] + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + + lea rdi,[64+rcx*1+rdi] + cmp r8,QWORD[((8+8))+rsp] + jb NEAR $L$sqrx8x_reduction_loop + ret + + +ALIGN 32 + +__bn_postx4x_internal: + + mov r12,QWORD[rbp] + mov r10,rcx + mov r9,rcx + neg rax + sar rcx,3+2 + +DB 102,72,15,126,202 +DB 102,72,15,126,206 + dec r12 + mov r13,QWORD[8+rbp] + xor r8,r8 + mov r14,QWORD[16+rbp] + mov r15,QWORD[24+rbp] + jmp NEAR $L$sqrx4x_sub_entry + +ALIGN 16 +$L$sqrx4x_sub: + mov r12,QWORD[rbp] + mov r13,QWORD[8+rbp] + mov r14,QWORD[16+rbp] + mov r15,QWORD[24+rbp] +$L$sqrx4x_sub_entry: + andn r12,r12,rax + lea rbp,[32+rbp] + andn r13,r13,rax + andn r14,r14,rax + andn r15,r15,rax + + neg r8 + adc r12,QWORD[rdi] + adc r13,QWORD[8+rdi] + adc r14,QWORD[16+rdi] + adc r15,QWORD[24+rdi] + mov QWORD[rdx],r12 + lea rdi,[32+rdi] + mov QWORD[8+rdx],r13 + sbb r8,r8 + mov QWORD[16+rdx],r14 + mov QWORD[24+rdx],r15 + lea rdx,[32+rdx] + + inc rcx + jnz NEAR $L$sqrx4x_sub + + neg r9 + + ret + + +global bn_scatter5 + +ALIGN 16 +bn_scatter5: + +_CET_ENDBR + cmp edx,0 + jz NEAR $L$scatter_epilogue + + + + + + + + + + lea r8,[r9*8+r8] +$L$scatter: + mov rax,QWORD[rcx] + lea rcx,[8+rcx] + mov QWORD[r8],rax + lea r8,[256+r8] + sub edx,1 + jnz NEAR $L$scatter +$L$scatter_epilogue: + ret + + + +global bn_gather5 + +ALIGN 32 +bn_gather5: + +$L$SEH_begin_bn_gather5: +_CET_ENDBR + + DB 0x4c,0x8d,0x14,0x24 + + DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00 + lea rax,[$L$inc] + and rsp,-16 + + movd xmm5,r9d + movdqa xmm0,XMMWORD[rax] + movdqa xmm1,XMMWORD[16+rax] + lea r11,[128+r8] + lea rax,[128+rsp] + + pshufd xmm5,xmm5,0 + movdqa xmm4,xmm1 + movdqa xmm2,xmm1 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa xmm3,xmm4 + + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[(-128)+rax],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[(-112)+rax],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[(-96)+rax],xmm2 + movdqa xmm2,xmm4 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[(-80)+rax],xmm3 + movdqa xmm3,xmm4 + + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[(-64)+rax],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[(-48)+rax],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[(-32)+rax],xmm2 + movdqa xmm2,xmm4 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[(-16)+rax],xmm3 + movdqa xmm3,xmm4 + + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[rax],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[16+rax],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[32+rax],xmm2 + movdqa xmm2,xmm4 + paddd xmm1,xmm0 + pcmpeqd xmm0,xmm5 + movdqa XMMWORD[48+rax],xmm3 + movdqa xmm3,xmm4 + + paddd xmm2,xmm1 + pcmpeqd xmm1,xmm5 + movdqa XMMWORD[64+rax],xmm0 + movdqa xmm0,xmm4 + + paddd xmm3,xmm2 + pcmpeqd xmm2,xmm5 + movdqa XMMWORD[80+rax],xmm1 + movdqa xmm1,xmm4 + + paddd xmm0,xmm3 + pcmpeqd xmm3,xmm5 + movdqa XMMWORD[96+rax],xmm2 + movdqa xmm2,xmm4 + movdqa XMMWORD[112+rax],xmm3 + jmp NEAR $L$gather + +ALIGN 32 +$L$gather: + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movdqa xmm0,XMMWORD[((-128))+r11] + movdqa xmm1,XMMWORD[((-112))+r11] + movdqa xmm2,XMMWORD[((-96))+r11] + pand xmm0,XMMWORD[((-128))+rax] + movdqa xmm3,XMMWORD[((-80))+r11] + pand xmm1,XMMWORD[((-112))+rax] + por xmm4,xmm0 + pand xmm2,XMMWORD[((-96))+rax] + por xmm5,xmm1 + pand xmm3,XMMWORD[((-80))+rax] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[((-64))+r11] + movdqa xmm1,XMMWORD[((-48))+r11] + movdqa xmm2,XMMWORD[((-32))+r11] + pand xmm0,XMMWORD[((-64))+rax] + movdqa xmm3,XMMWORD[((-16))+r11] + pand xmm1,XMMWORD[((-48))+rax] + por xmm4,xmm0 + pand xmm2,XMMWORD[((-32))+rax] + por xmm5,xmm1 + pand xmm3,XMMWORD[((-16))+rax] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[r11] + movdqa xmm1,XMMWORD[16+r11] + movdqa xmm2,XMMWORD[32+r11] + pand xmm0,XMMWORD[rax] + movdqa xmm3,XMMWORD[48+r11] + pand xmm1,XMMWORD[16+rax] + por xmm4,xmm0 + pand xmm2,XMMWORD[32+rax] + por xmm5,xmm1 + pand xmm3,XMMWORD[48+rax] + por xmm4,xmm2 + por xmm5,xmm3 + movdqa xmm0,XMMWORD[64+r11] + movdqa xmm1,XMMWORD[80+r11] + movdqa xmm2,XMMWORD[96+r11] + pand xmm0,XMMWORD[64+rax] + movdqa xmm3,XMMWORD[112+r11] + pand xmm1,XMMWORD[80+rax] + por xmm4,xmm0 + pand xmm2,XMMWORD[96+rax] + por xmm5,xmm1 + pand xmm3,XMMWORD[112+rax] + por xmm4,xmm2 + por xmm5,xmm3 + por xmm4,xmm5 + lea r11,[256+r11] + + pshufd xmm0,xmm4,0x4e + por xmm0,xmm4 + movq QWORD[rcx],xmm0 + lea rcx,[8+rcx] + sub edx,1 + jnz NEAR $L$gather + + lea rsp,[r10] + + ret +$L$SEH_end_bn_gather5: + + +section .rdata rdata align=8 +ALIGN 64 +$L$inc: + DD 0,0,1,1 + DD 2,2,2,2 + DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 + DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 + DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 + DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 + DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 + DB 112,101,110,115,115,108,46,111,114,103,62,0 +section .text + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +mul_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_pop_regs + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[8+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea r10,[$L$mul_epilogue] + cmp rbx,r10 + ja NEAR $L$body_40 + + mov r10,QWORD[192+r8] + mov rax,QWORD[8+r10*8+rax] + + jmp NEAR $L$common_pop_regs + +$L$body_40: + mov rax,QWORD[40+rax] +$L$common_pop_regs: + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase + DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase + DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase + + DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase + DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase + DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase + + DD $L$SEH_begin_bn_power5 wrt ..imagebase + DD $L$SEH_end_bn_power5 wrt ..imagebase + DD $L$SEH_info_bn_power5 wrt ..imagebase + DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase + DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase + DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase + + DD $L$SEH_begin_bn_powerx5 wrt ..imagebase + DD $L$SEH_end_bn_powerx5 wrt ..imagebase + DD $L$SEH_info_bn_powerx5 wrt ..imagebase + DD $L$SEH_begin_bn_gather5 wrt ..imagebase + DD $L$SEH_end_bn_gather5 wrt ..imagebase + DD $L$SEH_info_bn_gather5 wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_bn_mul_mont_gather5: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_mul4x_mont_gather5: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_power5: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_mulx4x_mont_gather5: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_powerx5: + DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_gather5: + DB 0x01,0x0b,0x03,0x0a + DB 0x0b,0x01,0x21,0x00 + DB 0x04,0xa3,0x00,0x00 +ALIGN 8 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-apple.S b/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-apple.S new file mode 100644 index 0000000000..81e2f071b1 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-apple.S @@ -0,0 +1,3081 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.section __DATA,__const + +.p2align 4 +one: +.quad 1,0 +two: +.quad 2,0 +three: +.quad 3,0 +four: +.quad 4,0 +five: +.quad 5,0 +six: +.quad 6,0 +seven: +.quad 7,0 +eight: +.quad 8,0 + +OR_MASK: +.long 0x00000000,0x00000000,0x00000000,0x80000000 +poly: +.quad 0x1, 0xc200000000000000 +mask: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +con1: +.long 1,1,1,1 +con2: +.long 0x1b,0x1b,0x1b,0x1b +con3: +.byte -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 +and_mask: +.long 0,0xffffffff, 0xffffffff, 0xffffffff +.text + +.p2align 4 +GFMUL: + + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm5 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm2,%xmm2 + vpxor %xmm3,%xmm5,%xmm5 + + vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 + vpshufd $78,%xmm2,%xmm4 + vpxor %xmm4,%xmm3,%xmm2 + + vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 + vpshufd $78,%xmm2,%xmm4 + vpxor %xmm4,%xmm3,%xmm2 + + vpxor %xmm5,%xmm2,%xmm0 + ret + + +.globl _aesgcmsiv_htable_init +.private_extern _aesgcmsiv_htable_init + +.p2align 4 +_aesgcmsiv_htable_init: + +_CET_ENDBR + vmovdqa (%rsi),%xmm0 + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm0,(%rdi) + call GFMUL + vmovdqa %xmm0,16(%rdi) + call GFMUL + vmovdqa %xmm0,32(%rdi) + call GFMUL + vmovdqa %xmm0,48(%rdi) + call GFMUL + vmovdqa %xmm0,64(%rdi) + call GFMUL + vmovdqa %xmm0,80(%rdi) + call GFMUL + vmovdqa %xmm0,96(%rdi) + call GFMUL + vmovdqa %xmm0,112(%rdi) + ret + + +.globl _aesgcmsiv_htable6_init +.private_extern _aesgcmsiv_htable6_init + +.p2align 4 +_aesgcmsiv_htable6_init: + +_CET_ENDBR + vmovdqa (%rsi),%xmm0 + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm0,(%rdi) + call GFMUL + vmovdqa %xmm0,16(%rdi) + call GFMUL + vmovdqa %xmm0,32(%rdi) + call GFMUL + vmovdqa %xmm0,48(%rdi) + call GFMUL + vmovdqa %xmm0,64(%rdi) + call GFMUL + vmovdqa %xmm0,80(%rdi) + ret + + +.globl _aesgcmsiv_htable_polyval +.private_extern _aesgcmsiv_htable_polyval + +.p2align 4 +_aesgcmsiv_htable_polyval: + +_CET_ENDBR + testq %rdx,%rdx + jnz L$htable_polyval_start + ret + +L$htable_polyval_start: + vzeroall + + + + movq %rdx,%r11 + andq $127,%r11 + + jz L$htable_polyval_no_prefix + + vpxor %xmm9,%xmm9,%xmm9 + vmovdqa (%rcx),%xmm1 + subq %r11,%rdx + + subq $16,%r11 + + + vmovdqu (%rsi),%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm5 + vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm3 + vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm4 + vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + leaq 16(%rsi),%rsi + testq %r11,%r11 + jnz L$htable_polyval_prefix_loop + jmp L$htable_polyval_prefix_complete + + +.p2align 6 +L$htable_polyval_prefix_loop: + subq $16,%r11 + + vmovdqu (%rsi),%xmm0 + + vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + testq %r11,%r11 + + leaq 16(%rsi),%rsi + + jnz L$htable_polyval_prefix_loop + +L$htable_polyval_prefix_complete: + vpsrldq $8,%xmm5,%xmm6 + vpslldq $8,%xmm5,%xmm5 + + vpxor %xmm6,%xmm4,%xmm9 + vpxor %xmm5,%xmm3,%xmm1 + + jmp L$htable_polyval_main_loop + +L$htable_polyval_no_prefix: + + + + + vpxor %xmm1,%xmm1,%xmm1 + vmovdqa (%rcx),%xmm9 + +.p2align 6 +L$htable_polyval_main_loop: + subq $0x80,%rdx + jb L$htable_polyval_out + + vmovdqu 112(%rsi),%xmm0 + + vpclmulqdq $0x01,(%rdi),%xmm0,%xmm5 + vpclmulqdq $0x00,(%rdi),%xmm0,%xmm3 + vpclmulqdq $0x11,(%rdi),%xmm0,%xmm4 + vpclmulqdq $0x10,(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vmovdqu 96(%rsi),%xmm0 + vpclmulqdq $0x01,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + + vmovdqu 80(%rsi),%xmm0 + + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 + vpalignr $8,%xmm1,%xmm1,%xmm1 + + vpclmulqdq $0x01,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpxor %xmm7,%xmm1,%xmm1 + + vmovdqu 64(%rsi),%xmm0 + + vpclmulqdq $0x01,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vmovdqu 48(%rsi),%xmm0 + + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 + vpalignr $8,%xmm1,%xmm1,%xmm1 + + vpclmulqdq $0x01,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpxor %xmm7,%xmm1,%xmm1 + + vmovdqu 32(%rsi),%xmm0 + + vpclmulqdq $0x01,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpxor %xmm9,%xmm1,%xmm1 + + vmovdqu 16(%rsi),%xmm0 + + vpclmulqdq $0x01,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vmovdqu 0(%rsi),%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x01,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpsrldq $8,%xmm5,%xmm6 + vpslldq $8,%xmm5,%xmm5 + + vpxor %xmm6,%xmm4,%xmm9 + vpxor %xmm5,%xmm3,%xmm1 + + leaq 128(%rsi),%rsi + jmp L$htable_polyval_main_loop + + + +L$htable_polyval_out: + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 + vpalignr $8,%xmm1,%xmm1,%xmm1 + vpxor %xmm6,%xmm1,%xmm1 + + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 + vpalignr $8,%xmm1,%xmm1,%xmm1 + vpxor %xmm6,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + + vmovdqu %xmm1,(%rcx) + vzeroupper + ret + + +.globl _aesgcmsiv_polyval_horner +.private_extern _aesgcmsiv_polyval_horner + +.p2align 4 +_aesgcmsiv_polyval_horner: + +_CET_ENDBR + testq %rcx,%rcx + jnz L$polyval_horner_start + ret + +L$polyval_horner_start: + + + + xorq %r10,%r10 + shlq $4,%rcx + + vmovdqa (%rsi),%xmm1 + vmovdqa (%rdi),%xmm0 + +L$polyval_horner_loop: + vpxor (%rdx,%r10,1),%xmm0,%xmm0 + call GFMUL + + addq $16,%r10 + cmpq %r10,%rcx + jne L$polyval_horner_loop + + + vmovdqa %xmm0,(%rdi) + ret + + +.globl _aes128gcmsiv_aes_ks +.private_extern _aes128gcmsiv_aes_ks + +.p2align 4 +_aes128gcmsiv_aes_ks: + +_CET_ENDBR + vmovdqu (%rdi),%xmm1 + vmovdqa %xmm1,(%rsi) + + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + + movq $8,%rax + +L$ks128_loop: + addq $16,%rsi + subq $1,%rax + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,(%rsi) + jne L$ks128_loop + + vmovdqa con2(%rip),%xmm0 + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,16(%rsi) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslldq $4,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,32(%rsi) + ret + + +.globl _aes256gcmsiv_aes_ks +.private_extern _aes256gcmsiv_aes_ks + +.p2align 4 +_aes256gcmsiv_aes_ks: + +_CET_ENDBR + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm3 + vmovdqa %xmm1,(%rsi) + vmovdqa %xmm3,16(%rsi) + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + vpxor %xmm14,%xmm14,%xmm14 + movq $6,%rax + +L$ks256_loop: + addq $32,%rsi + subq $1,%rax + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,(%rsi) + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpsllq $32,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpshufb con3(%rip),%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vmovdqa %xmm3,16(%rsi) + jne L$ks256_loop + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpsllq $32,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,32(%rsi) + ret + +.globl _aes128gcmsiv_aes_ks_enc_x1 +.private_extern _aes128gcmsiv_aes_ks_enc_x1 + +.p2align 4 +_aes128gcmsiv_aes_ks_enc_x1: + +_CET_ENDBR + vmovdqa (%rcx),%xmm1 + vmovdqa 0(%rdi),%xmm4 + + vmovdqa %xmm1,(%rdx) + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,16(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,32(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,48(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,64(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,80(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,96(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,112(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,128(%rdx) + + + vmovdqa con2(%rip),%xmm0 + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,144(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenclast %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,160(%rdx) + + + vmovdqa %xmm4,0(%rsi) + ret + + +.globl _aes128gcmsiv_kdf +.private_extern _aes128gcmsiv_kdf + +.p2align 4 +_aes128gcmsiv_kdf: + +_CET_ENDBR + + + + + vmovdqa (%rdx),%xmm1 + vmovdqa 0(%rdi),%xmm9 + vmovdqa and_mask(%rip),%xmm12 + vmovdqa one(%rip),%xmm13 + vpshufd $0x90,%xmm9,%xmm9 + vpand %xmm12,%xmm9,%xmm9 + vpaddd %xmm13,%xmm9,%xmm10 + vpaddd %xmm13,%xmm10,%xmm11 + vpaddd %xmm13,%xmm11,%xmm12 + + vpxor %xmm1,%xmm9,%xmm9 + vpxor %xmm1,%xmm10,%xmm10 + vpxor %xmm1,%xmm11,%xmm11 + vpxor %xmm1,%xmm12,%xmm12 + + vmovdqa 16(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 32(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 48(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 64(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 80(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 96(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 112(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 128(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 144(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 160(%rdx),%xmm2 + vaesenclast %xmm2,%xmm9,%xmm9 + vaesenclast %xmm2,%xmm10,%xmm10 + vaesenclast %xmm2,%xmm11,%xmm11 + vaesenclast %xmm2,%xmm12,%xmm12 + + + vmovdqa %xmm9,0(%rsi) + vmovdqa %xmm10,16(%rsi) + vmovdqa %xmm11,32(%rsi) + vmovdqa %xmm12,48(%rsi) + ret + + +.globl _aes128gcmsiv_enc_msg_x4 +.private_extern _aes128gcmsiv_enc_msg_x4 + +.p2align 4 +_aes128gcmsiv_enc_msg_x4: + +_CET_ENDBR + testq %r8,%r8 + jnz L$128_enc_msg_x4_start + ret + +L$128_enc_msg_x4_start: + pushq %r12 + + pushq %r13 + + + shrq $4,%r8 + movq %r8,%r10 + shlq $62,%r10 + shrq $62,%r10 + + + vmovdqa (%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + + vmovdqu four(%rip),%xmm4 + vmovdqa %xmm15,%xmm0 + vpaddd one(%rip),%xmm15,%xmm1 + vpaddd two(%rip),%xmm15,%xmm2 + vpaddd three(%rip),%xmm15,%xmm3 + + shrq $2,%r8 + je L$128_enc_msg_x4_check_remainder + + subq $64,%rsi + subq $64,%rdi + +L$128_enc_msg_x4_loop1: + addq $64,%rsi + addq $64,%rdi + + vmovdqa %xmm0,%xmm5 + vmovdqa %xmm1,%xmm6 + vmovdqa %xmm2,%xmm7 + vmovdqa %xmm3,%xmm8 + + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqu 32(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm3,%xmm3 + + vmovdqu 80(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 96(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 112(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 128(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 144(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm12 + vaesenclast %xmm12,%xmm5,%xmm5 + vaesenclast %xmm12,%xmm6,%xmm6 + vaesenclast %xmm12,%xmm7,%xmm7 + vaesenclast %xmm12,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm5,%xmm5 + vpxor 16(%rdi),%xmm6,%xmm6 + vpxor 32(%rdi),%xmm7,%xmm7 + vpxor 48(%rdi),%xmm8,%xmm8 + + subq $1,%r8 + + vmovdqu %xmm5,0(%rsi) + vmovdqu %xmm6,16(%rsi) + vmovdqu %xmm7,32(%rsi) + vmovdqu %xmm8,48(%rsi) + + jne L$128_enc_msg_x4_loop1 + + addq $64,%rsi + addq $64,%rdi + +L$128_enc_msg_x4_check_remainder: + cmpq $0,%r10 + je L$128_enc_msg_x4_out + +L$128_enc_msg_x4_loop2: + + + vmovdqa %xmm0,%xmm5 + vpaddd one(%rip),%xmm0,%xmm0 + + vpxor (%rcx),%xmm5,%xmm5 + vaesenc 16(%rcx),%xmm5,%xmm5 + vaesenc 32(%rcx),%xmm5,%xmm5 + vaesenc 48(%rcx),%xmm5,%xmm5 + vaesenc 64(%rcx),%xmm5,%xmm5 + vaesenc 80(%rcx),%xmm5,%xmm5 + vaesenc 96(%rcx),%xmm5,%xmm5 + vaesenc 112(%rcx),%xmm5,%xmm5 + vaesenc 128(%rcx),%xmm5,%xmm5 + vaesenc 144(%rcx),%xmm5,%xmm5 + vaesenclast 160(%rcx),%xmm5,%xmm5 + + + vpxor (%rdi),%xmm5,%xmm5 + vmovdqu %xmm5,(%rsi) + + addq $16,%rdi + addq $16,%rsi + + subq $1,%r10 + jne L$128_enc_msg_x4_loop2 + +L$128_enc_msg_x4_out: + popq %r13 + + popq %r12 + + ret + + +.globl _aes128gcmsiv_enc_msg_x8 +.private_extern _aes128gcmsiv_enc_msg_x8 + +.p2align 4 +_aes128gcmsiv_enc_msg_x8: + +_CET_ENDBR + testq %r8,%r8 + jnz L$128_enc_msg_x8_start + ret + +L$128_enc_msg_x8_start: + pushq %r12 + + pushq %r13 + + pushq %rbp + + movq %rsp,%rbp + + + + subq $128,%rsp + andq $-64,%rsp + + shrq $4,%r8 + movq %r8,%r10 + shlq $61,%r10 + shrq $61,%r10 + + + vmovdqu (%rdx),%xmm1 + vpor OR_MASK(%rip),%xmm1,%xmm1 + + + vpaddd seven(%rip),%xmm1,%xmm0 + vmovdqu %xmm0,(%rsp) + vpaddd one(%rip),%xmm1,%xmm9 + vpaddd two(%rip),%xmm1,%xmm10 + vpaddd three(%rip),%xmm1,%xmm11 + vpaddd four(%rip),%xmm1,%xmm12 + vpaddd five(%rip),%xmm1,%xmm13 + vpaddd six(%rip),%xmm1,%xmm14 + vmovdqa %xmm1,%xmm0 + + shrq $3,%r8 + je L$128_enc_msg_x8_check_remainder + + subq $128,%rsi + subq $128,%rdi + +L$128_enc_msg_x8_loop1: + addq $128,%rsi + addq $128,%rdi + + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm9,%xmm2 + vmovdqa %xmm10,%xmm3 + vmovdqa %xmm11,%xmm4 + vmovdqa %xmm12,%xmm5 + vmovdqa %xmm13,%xmm6 + vmovdqa %xmm14,%xmm7 + + vmovdqu (%rsp),%xmm8 + + vpxor (%rcx),%xmm1,%xmm1 + vpxor (%rcx),%xmm2,%xmm2 + vpxor (%rcx),%xmm3,%xmm3 + vpxor (%rcx),%xmm4,%xmm4 + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu (%rsp),%xmm14 + vpaddd eight(%rip),%xmm14,%xmm14 + vmovdqu %xmm14,(%rsp) + vmovdqu 32(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpsubd one(%rip),%xmm14,%xmm14 + vmovdqu 48(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm0,%xmm0 + vmovdqu 64(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm9,%xmm9 + vmovdqu 80(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm10,%xmm10 + vmovdqu 96(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm11,%xmm11 + vmovdqu 112(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm12,%xmm12 + vmovdqu 128(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm13,%xmm13 + vmovdqu 144(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm15 + vaesenclast %xmm15,%xmm1,%xmm1 + vaesenclast %xmm15,%xmm2,%xmm2 + vaesenclast %xmm15,%xmm3,%xmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vaesenclast %xmm15,%xmm6,%xmm6 + vaesenclast %xmm15,%xmm7,%xmm7 + vaesenclast %xmm15,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm1,%xmm1 + vpxor 16(%rdi),%xmm2,%xmm2 + vpxor 32(%rdi),%xmm3,%xmm3 + vpxor 48(%rdi),%xmm4,%xmm4 + vpxor 64(%rdi),%xmm5,%xmm5 + vpxor 80(%rdi),%xmm6,%xmm6 + vpxor 96(%rdi),%xmm7,%xmm7 + vpxor 112(%rdi),%xmm8,%xmm8 + + decq %r8 + + vmovdqu %xmm1,0(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + vmovdqu %xmm7,96(%rsi) + vmovdqu %xmm8,112(%rsi) + + jne L$128_enc_msg_x8_loop1 + + addq $128,%rsi + addq $128,%rdi + +L$128_enc_msg_x8_check_remainder: + cmpq $0,%r10 + je L$128_enc_msg_x8_out + +L$128_enc_msg_x8_loop2: + + + vmovdqa %xmm0,%xmm1 + vpaddd one(%rip),%xmm0,%xmm0 + + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenclast 160(%rcx),%xmm1,%xmm1 + + + vpxor (%rdi),%xmm1,%xmm1 + + vmovdqu %xmm1,(%rsi) + + addq $16,%rdi + addq $16,%rsi + + decq %r10 + jne L$128_enc_msg_x8_loop2 + +L$128_enc_msg_x8_out: + movq %rbp,%rsp + + popq %rbp + + popq %r13 + + popq %r12 + + ret + + +.globl _aes128gcmsiv_dec +.private_extern _aes128gcmsiv_dec + +.p2align 4 +_aes128gcmsiv_dec: + +_CET_ENDBR + testq $~15,%r9 + jnz L$128_dec_start + ret + +L$128_dec_start: + vzeroupper + vmovdqa (%rdx),%xmm0 + + + vmovdqu 16(%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + movq %rdx,%rax + + leaq 32(%rax),%rax + leaq 32(%rcx),%rcx + + andq $~15,%r9 + + + cmpq $96,%r9 + jb L$128_dec_loop2 + + + subq $96,%r9 + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vpxor (%r8),%xmm7,%xmm7 + vpxor (%r8),%xmm8,%xmm8 + vpxor (%r8),%xmm9,%xmm9 + vpxor (%r8),%xmm10,%xmm10 + vpxor (%r8),%xmm11,%xmm11 + vpxor (%r8),%xmm12,%xmm12 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vaesenclast %xmm4,%xmm8,%xmm8 + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm4,%xmm10,%xmm10 + vaesenclast %xmm4,%xmm11,%xmm11 + vaesenclast %xmm4,%xmm12,%xmm12 + + + vpxor 0(%rdi),%xmm7,%xmm7 + vpxor 16(%rdi),%xmm8,%xmm8 + vpxor 32(%rdi),%xmm9,%xmm9 + vpxor 48(%rdi),%xmm10,%xmm10 + vpxor 64(%rdi),%xmm11,%xmm11 + vpxor 80(%rdi),%xmm12,%xmm12 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + addq $96,%rdi + addq $96,%rsi + jmp L$128_dec_loop1 + + +.p2align 6 +L$128_dec_loop1: + cmpq $96,%r9 + jb L$128_dec_finish_96 + subq $96,%r9 + + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vmovdqa (%r8),%xmm4 + vpxor %xmm4,%xmm7,%xmm7 + vpxor %xmm4,%xmm8,%xmm8 + vpxor %xmm4,%xmm9,%xmm9 + vpxor %xmm4,%xmm10,%xmm10 + vpxor %xmm4,%xmm11,%xmm11 + vpxor %xmm4,%xmm12,%xmm12 + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vmovdqa 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm6 + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor 0(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vpxor 16(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm8,%xmm8 + vpxor 32(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm9,%xmm9 + vpxor 48(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm10,%xmm10 + vpxor 64(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm11,%xmm11 + vpxor 80(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm12,%xmm12 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + vpxor %xmm5,%xmm0,%xmm0 + + leaq 96(%rdi),%rdi + leaq 96(%rsi),%rsi + jmp L$128_dec_loop1 + +L$128_dec_finish_96: + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor %xmm5,%xmm0,%xmm0 + +L$128_dec_loop2: + + + + cmpq $16,%r9 + jb L$128_dec_out + subq $16,%r9 + + vmovdqa %xmm15,%xmm2 + vpaddd one(%rip),%xmm15,%xmm15 + + vpxor 0(%r8),%xmm2,%xmm2 + vaesenc 16(%r8),%xmm2,%xmm2 + vaesenc 32(%r8),%xmm2,%xmm2 + vaesenc 48(%r8),%xmm2,%xmm2 + vaesenc 64(%r8),%xmm2,%xmm2 + vaesenc 80(%r8),%xmm2,%xmm2 + vaesenc 96(%r8),%xmm2,%xmm2 + vaesenc 112(%r8),%xmm2,%xmm2 + vaesenc 128(%r8),%xmm2,%xmm2 + vaesenc 144(%r8),%xmm2,%xmm2 + vaesenclast 160(%r8),%xmm2,%xmm2 + vpxor (%rdi),%xmm2,%xmm2 + vmovdqu %xmm2,(%rsi) + addq $16,%rdi + addq $16,%rsi + + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa -32(%rcx),%xmm1 + call GFMUL + + jmp L$128_dec_loop2 + +L$128_dec_out: + vmovdqu %xmm0,(%rdx) + ret + + +.globl _aes128gcmsiv_ecb_enc_block +.private_extern _aes128gcmsiv_ecb_enc_block + +.p2align 4 +_aes128gcmsiv_ecb_enc_block: + +_CET_ENDBR + vmovdqa (%rdi),%xmm1 + + vpxor (%rdx),%xmm1,%xmm1 + vaesenc 16(%rdx),%xmm1,%xmm1 + vaesenc 32(%rdx),%xmm1,%xmm1 + vaesenc 48(%rdx),%xmm1,%xmm1 + vaesenc 64(%rdx),%xmm1,%xmm1 + vaesenc 80(%rdx),%xmm1,%xmm1 + vaesenc 96(%rdx),%xmm1,%xmm1 + vaesenc 112(%rdx),%xmm1,%xmm1 + vaesenc 128(%rdx),%xmm1,%xmm1 + vaesenc 144(%rdx),%xmm1,%xmm1 + vaesenclast 160(%rdx),%xmm1,%xmm1 + + vmovdqa %xmm1,(%rsi) + + ret + + +.globl _aes256gcmsiv_aes_ks_enc_x1 +.private_extern _aes256gcmsiv_aes_ks_enc_x1 + +.p2align 4 +_aes256gcmsiv_aes_ks_enc_x1: + +_CET_ENDBR + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + vmovdqa (%rdi),%xmm8 + vmovdqa (%rcx),%xmm1 + vmovdqa 16(%rcx),%xmm3 + vpxor %xmm1,%xmm8,%xmm8 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm1,(%rdx) + vmovdqu %xmm3,16(%rdx) + vpxor %xmm14,%xmm14,%xmm14 + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,32(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,48(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,64(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,80(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,96(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,112(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,128(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,144(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,160(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,176(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,192(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,208(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenclast %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,224(%rdx) + + vmovdqa %xmm8,(%rsi) + ret + + +.globl _aes256gcmsiv_ecb_enc_block +.private_extern _aes256gcmsiv_ecb_enc_block + +.p2align 4 +_aes256gcmsiv_ecb_enc_block: + +_CET_ENDBR + vmovdqa (%rdi),%xmm1 + vpxor (%rdx),%xmm1,%xmm1 + vaesenc 16(%rdx),%xmm1,%xmm1 + vaesenc 32(%rdx),%xmm1,%xmm1 + vaesenc 48(%rdx),%xmm1,%xmm1 + vaesenc 64(%rdx),%xmm1,%xmm1 + vaesenc 80(%rdx),%xmm1,%xmm1 + vaesenc 96(%rdx),%xmm1,%xmm1 + vaesenc 112(%rdx),%xmm1,%xmm1 + vaesenc 128(%rdx),%xmm1,%xmm1 + vaesenc 144(%rdx),%xmm1,%xmm1 + vaesenc 160(%rdx),%xmm1,%xmm1 + vaesenc 176(%rdx),%xmm1,%xmm1 + vaesenc 192(%rdx),%xmm1,%xmm1 + vaesenc 208(%rdx),%xmm1,%xmm1 + vaesenclast 224(%rdx),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsi) + ret + + +.globl _aes256gcmsiv_enc_msg_x4 +.private_extern _aes256gcmsiv_enc_msg_x4 + +.p2align 4 +_aes256gcmsiv_enc_msg_x4: + +_CET_ENDBR + testq %r8,%r8 + jnz L$256_enc_msg_x4_start + ret + +L$256_enc_msg_x4_start: + movq %r8,%r10 + shrq $4,%r8 + shlq $60,%r10 + jz L$256_enc_msg_x4_start2 + addq $1,%r8 + +L$256_enc_msg_x4_start2: + movq %r8,%r10 + shlq $62,%r10 + shrq $62,%r10 + + + vmovdqa (%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + + vmovdqa four(%rip),%xmm4 + vmovdqa %xmm15,%xmm0 + vpaddd one(%rip),%xmm15,%xmm1 + vpaddd two(%rip),%xmm15,%xmm2 + vpaddd three(%rip),%xmm15,%xmm3 + + shrq $2,%r8 + je L$256_enc_msg_x4_check_remainder + + subq $64,%rsi + subq $64,%rdi + +L$256_enc_msg_x4_loop1: + addq $64,%rsi + addq $64,%rdi + + vmovdqa %xmm0,%xmm5 + vmovdqa %xmm1,%xmm6 + vmovdqa %xmm2,%xmm7 + vmovdqa %xmm3,%xmm8 + + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqu 32(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm3,%xmm3 + + vmovdqu 80(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 96(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 112(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 128(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 144(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 176(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 192(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 208(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 224(%rcx),%xmm12 + vaesenclast %xmm12,%xmm5,%xmm5 + vaesenclast %xmm12,%xmm6,%xmm6 + vaesenclast %xmm12,%xmm7,%xmm7 + vaesenclast %xmm12,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm5,%xmm5 + vpxor 16(%rdi),%xmm6,%xmm6 + vpxor 32(%rdi),%xmm7,%xmm7 + vpxor 48(%rdi),%xmm8,%xmm8 + + subq $1,%r8 + + vmovdqu %xmm5,0(%rsi) + vmovdqu %xmm6,16(%rsi) + vmovdqu %xmm7,32(%rsi) + vmovdqu %xmm8,48(%rsi) + + jne L$256_enc_msg_x4_loop1 + + addq $64,%rsi + addq $64,%rdi + +L$256_enc_msg_x4_check_remainder: + cmpq $0,%r10 + je L$256_enc_msg_x4_out + +L$256_enc_msg_x4_loop2: + + + + vmovdqa %xmm0,%xmm5 + vpaddd one(%rip),%xmm0,%xmm0 + vpxor (%rcx),%xmm5,%xmm5 + vaesenc 16(%rcx),%xmm5,%xmm5 + vaesenc 32(%rcx),%xmm5,%xmm5 + vaesenc 48(%rcx),%xmm5,%xmm5 + vaesenc 64(%rcx),%xmm5,%xmm5 + vaesenc 80(%rcx),%xmm5,%xmm5 + vaesenc 96(%rcx),%xmm5,%xmm5 + vaesenc 112(%rcx),%xmm5,%xmm5 + vaesenc 128(%rcx),%xmm5,%xmm5 + vaesenc 144(%rcx),%xmm5,%xmm5 + vaesenc 160(%rcx),%xmm5,%xmm5 + vaesenc 176(%rcx),%xmm5,%xmm5 + vaesenc 192(%rcx),%xmm5,%xmm5 + vaesenc 208(%rcx),%xmm5,%xmm5 + vaesenclast 224(%rcx),%xmm5,%xmm5 + + + vpxor (%rdi),%xmm5,%xmm5 + + vmovdqu %xmm5,(%rsi) + + addq $16,%rdi + addq $16,%rsi + + subq $1,%r10 + jne L$256_enc_msg_x4_loop2 + +L$256_enc_msg_x4_out: + ret + + +.globl _aes256gcmsiv_enc_msg_x8 +.private_extern _aes256gcmsiv_enc_msg_x8 + +.p2align 4 +_aes256gcmsiv_enc_msg_x8: + +_CET_ENDBR + testq %r8,%r8 + jnz L$256_enc_msg_x8_start + ret + +L$256_enc_msg_x8_start: + + movq %rsp,%r11 + subq $16,%r11 + andq $-64,%r11 + + movq %r8,%r10 + shrq $4,%r8 + shlq $60,%r10 + jz L$256_enc_msg_x8_start2 + addq $1,%r8 + +L$256_enc_msg_x8_start2: + movq %r8,%r10 + shlq $61,%r10 + shrq $61,%r10 + + + vmovdqa (%rdx),%xmm1 + vpor OR_MASK(%rip),%xmm1,%xmm1 + + + vpaddd seven(%rip),%xmm1,%xmm0 + vmovdqa %xmm0,(%r11) + vpaddd one(%rip),%xmm1,%xmm9 + vpaddd two(%rip),%xmm1,%xmm10 + vpaddd three(%rip),%xmm1,%xmm11 + vpaddd four(%rip),%xmm1,%xmm12 + vpaddd five(%rip),%xmm1,%xmm13 + vpaddd six(%rip),%xmm1,%xmm14 + vmovdqa %xmm1,%xmm0 + + shrq $3,%r8 + jz L$256_enc_msg_x8_check_remainder + + subq $128,%rsi + subq $128,%rdi + +L$256_enc_msg_x8_loop1: + addq $128,%rsi + addq $128,%rdi + + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm9,%xmm2 + vmovdqa %xmm10,%xmm3 + vmovdqa %xmm11,%xmm4 + vmovdqa %xmm12,%xmm5 + vmovdqa %xmm13,%xmm6 + vmovdqa %xmm14,%xmm7 + + vmovdqa (%r11),%xmm8 + + vpxor (%rcx),%xmm1,%xmm1 + vpxor (%rcx),%xmm2,%xmm2 + vpxor (%rcx),%xmm3,%xmm3 + vpxor (%rcx),%xmm4,%xmm4 + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqa (%r11),%xmm14 + vpaddd eight(%rip),%xmm14,%xmm14 + vmovdqa %xmm14,(%r11) + vmovdqu 32(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpsubd one(%rip),%xmm14,%xmm14 + vmovdqu 48(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm0,%xmm0 + vmovdqu 64(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm9,%xmm9 + vmovdqu 80(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm10,%xmm10 + vmovdqu 96(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm11,%xmm11 + vmovdqu 112(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm12,%xmm12 + vmovdqu 128(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm13,%xmm13 + vmovdqu 144(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 176(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 192(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 208(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 224(%rcx),%xmm15 + vaesenclast %xmm15,%xmm1,%xmm1 + vaesenclast %xmm15,%xmm2,%xmm2 + vaesenclast %xmm15,%xmm3,%xmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vaesenclast %xmm15,%xmm6,%xmm6 + vaesenclast %xmm15,%xmm7,%xmm7 + vaesenclast %xmm15,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm1,%xmm1 + vpxor 16(%rdi),%xmm2,%xmm2 + vpxor 32(%rdi),%xmm3,%xmm3 + vpxor 48(%rdi),%xmm4,%xmm4 + vpxor 64(%rdi),%xmm5,%xmm5 + vpxor 80(%rdi),%xmm6,%xmm6 + vpxor 96(%rdi),%xmm7,%xmm7 + vpxor 112(%rdi),%xmm8,%xmm8 + + subq $1,%r8 + + vmovdqu %xmm1,0(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + vmovdqu %xmm7,96(%rsi) + vmovdqu %xmm8,112(%rsi) + + jne L$256_enc_msg_x8_loop1 + + addq $128,%rsi + addq $128,%rdi + +L$256_enc_msg_x8_check_remainder: + cmpq $0,%r10 + je L$256_enc_msg_x8_out + +L$256_enc_msg_x8_loop2: + + + vmovdqa %xmm0,%xmm1 + vpaddd one(%rip),%xmm0,%xmm0 + + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenc 160(%rcx),%xmm1,%xmm1 + vaesenc 176(%rcx),%xmm1,%xmm1 + vaesenc 192(%rcx),%xmm1,%xmm1 + vaesenc 208(%rcx),%xmm1,%xmm1 + vaesenclast 224(%rcx),%xmm1,%xmm1 + + + vpxor (%rdi),%xmm1,%xmm1 + + vmovdqu %xmm1,(%rsi) + + addq $16,%rdi + addq $16,%rsi + subq $1,%r10 + jnz L$256_enc_msg_x8_loop2 + +L$256_enc_msg_x8_out: + ret + + + +.globl _aes256gcmsiv_dec +.private_extern _aes256gcmsiv_dec + +.p2align 4 +_aes256gcmsiv_dec: + +_CET_ENDBR + testq $~15,%r9 + jnz L$256_dec_start + ret + +L$256_dec_start: + vzeroupper + vmovdqa (%rdx),%xmm0 + + + vmovdqu 16(%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + movq %rdx,%rax + + leaq 32(%rax),%rax + leaq 32(%rcx),%rcx + + andq $~15,%r9 + + + cmpq $96,%r9 + jb L$256_dec_loop2 + + + subq $96,%r9 + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vpxor (%r8),%xmm7,%xmm7 + vpxor (%r8),%xmm8,%xmm8 + vpxor (%r8),%xmm9,%xmm9 + vpxor (%r8),%xmm10,%xmm10 + vpxor (%r8),%xmm11,%xmm11 + vpxor (%r8),%xmm12,%xmm12 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 176(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 192(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 208(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 224(%r8),%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vaesenclast %xmm4,%xmm8,%xmm8 + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm4,%xmm10,%xmm10 + vaesenclast %xmm4,%xmm11,%xmm11 + vaesenclast %xmm4,%xmm12,%xmm12 + + + vpxor 0(%rdi),%xmm7,%xmm7 + vpxor 16(%rdi),%xmm8,%xmm8 + vpxor 32(%rdi),%xmm9,%xmm9 + vpxor 48(%rdi),%xmm10,%xmm10 + vpxor 64(%rdi),%xmm11,%xmm11 + vpxor 80(%rdi),%xmm12,%xmm12 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + addq $96,%rdi + addq $96,%rsi + jmp L$256_dec_loop1 + + +.p2align 6 +L$256_dec_loop1: + cmpq $96,%r9 + jb L$256_dec_finish_96 + subq $96,%r9 + + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vmovdqa (%r8),%xmm4 + vpxor %xmm4,%xmm7,%xmm7 + vpxor %xmm4,%xmm8,%xmm8 + vpxor %xmm4,%xmm9,%xmm9 + vpxor %xmm4,%xmm10,%xmm10 + vpxor %xmm4,%xmm11,%xmm11 + vpxor %xmm4,%xmm12,%xmm12 + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vmovdqa 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 176(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 192(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 208(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 224(%r8),%xmm6 + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor 0(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vpxor 16(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm8,%xmm8 + vpxor 32(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm9,%xmm9 + vpxor 48(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm10,%xmm10 + vpxor 64(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm11,%xmm11 + vpxor 80(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm12,%xmm12 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + vpxor %xmm5,%xmm0,%xmm0 + + leaq 96(%rdi),%rdi + leaq 96(%rsi),%rsi + jmp L$256_dec_loop1 + +L$256_dec_finish_96: + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor %xmm5,%xmm0,%xmm0 + +L$256_dec_loop2: + + + + cmpq $16,%r9 + jb L$256_dec_out + subq $16,%r9 + + vmovdqa %xmm15,%xmm2 + vpaddd one(%rip),%xmm15,%xmm15 + + vpxor 0(%r8),%xmm2,%xmm2 + vaesenc 16(%r8),%xmm2,%xmm2 + vaesenc 32(%r8),%xmm2,%xmm2 + vaesenc 48(%r8),%xmm2,%xmm2 + vaesenc 64(%r8),%xmm2,%xmm2 + vaesenc 80(%r8),%xmm2,%xmm2 + vaesenc 96(%r8),%xmm2,%xmm2 + vaesenc 112(%r8),%xmm2,%xmm2 + vaesenc 128(%r8),%xmm2,%xmm2 + vaesenc 144(%r8),%xmm2,%xmm2 + vaesenc 160(%r8),%xmm2,%xmm2 + vaesenc 176(%r8),%xmm2,%xmm2 + vaesenc 192(%r8),%xmm2,%xmm2 + vaesenc 208(%r8),%xmm2,%xmm2 + vaesenclast 224(%r8),%xmm2,%xmm2 + vpxor (%rdi),%xmm2,%xmm2 + vmovdqu %xmm2,(%rsi) + addq $16,%rdi + addq $16,%rsi + + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa -32(%rcx),%xmm1 + call GFMUL + + jmp L$256_dec_loop2 + +L$256_dec_out: + vmovdqu %xmm0,(%rdx) + ret + + +.globl _aes256gcmsiv_kdf +.private_extern _aes256gcmsiv_kdf + +.p2align 4 +_aes256gcmsiv_kdf: + +_CET_ENDBR + + + + + vmovdqa (%rdx),%xmm1 + vmovdqa 0(%rdi),%xmm4 + vmovdqa and_mask(%rip),%xmm11 + vmovdqa one(%rip),%xmm8 + vpshufd $0x90,%xmm4,%xmm4 + vpand %xmm11,%xmm4,%xmm4 + vpaddd %xmm8,%xmm4,%xmm6 + vpaddd %xmm8,%xmm6,%xmm7 + vpaddd %xmm8,%xmm7,%xmm11 + vpaddd %xmm8,%xmm11,%xmm12 + vpaddd %xmm8,%xmm12,%xmm13 + + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm1,%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm1,%xmm11,%xmm11 + vpxor %xmm1,%xmm12,%xmm12 + vpxor %xmm1,%xmm13,%xmm13 + + vmovdqa 16(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 32(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 48(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 64(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 80(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 96(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 112(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 128(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 144(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 160(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 176(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 192(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 208(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 224(%rdx),%xmm2 + vaesenclast %xmm2,%xmm4,%xmm4 + vaesenclast %xmm2,%xmm6,%xmm6 + vaesenclast %xmm2,%xmm7,%xmm7 + vaesenclast %xmm2,%xmm11,%xmm11 + vaesenclast %xmm2,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + + + vmovdqa %xmm4,0(%rsi) + vmovdqa %xmm6,16(%rsi) + vmovdqa %xmm7,32(%rsi) + vmovdqa %xmm11,48(%rsi) + vmovdqa %xmm12,64(%rsi) + vmovdqa %xmm13,80(%rsi) + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-linux.S b/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-linux.S new file mode 100644 index 0000000000..a8de4a9ab8 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-linux.S @@ -0,0 +1,3091 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.section .rodata + +.align 16 +one: +.quad 1,0 +two: +.quad 2,0 +three: +.quad 3,0 +four: +.quad 4,0 +five: +.quad 5,0 +six: +.quad 6,0 +seven: +.quad 7,0 +eight: +.quad 8,0 + +OR_MASK: +.long 0x00000000,0x00000000,0x00000000,0x80000000 +poly: +.quad 0x1, 0xc200000000000000 +mask: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +con1: +.long 1,1,1,1 +con2: +.long 0x1b,0x1b,0x1b,0x1b +con3: +.byte -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 +and_mask: +.long 0,0xffffffff, 0xffffffff, 0xffffffff +.text +.type GFMUL,@function +.align 16 +GFMUL: +.cfi_startproc + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm5 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm2,%xmm2 + vpxor %xmm3,%xmm5,%xmm5 + + vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 + vpshufd $78,%xmm2,%xmm4 + vpxor %xmm4,%xmm3,%xmm2 + + vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 + vpshufd $78,%xmm2,%xmm4 + vpxor %xmm4,%xmm3,%xmm2 + + vpxor %xmm5,%xmm2,%xmm0 + ret +.cfi_endproc +.size GFMUL, .-GFMUL +.globl aesgcmsiv_htable_init +.hidden aesgcmsiv_htable_init +.type aesgcmsiv_htable_init,@function +.align 16 +aesgcmsiv_htable_init: +.cfi_startproc +_CET_ENDBR + vmovdqa (%rsi),%xmm0 + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm0,(%rdi) + call GFMUL + vmovdqa %xmm0,16(%rdi) + call GFMUL + vmovdqa %xmm0,32(%rdi) + call GFMUL + vmovdqa %xmm0,48(%rdi) + call GFMUL + vmovdqa %xmm0,64(%rdi) + call GFMUL + vmovdqa %xmm0,80(%rdi) + call GFMUL + vmovdqa %xmm0,96(%rdi) + call GFMUL + vmovdqa %xmm0,112(%rdi) + ret +.cfi_endproc +.size aesgcmsiv_htable_init, .-aesgcmsiv_htable_init +.globl aesgcmsiv_htable6_init +.hidden aesgcmsiv_htable6_init +.type aesgcmsiv_htable6_init,@function +.align 16 +aesgcmsiv_htable6_init: +.cfi_startproc +_CET_ENDBR + vmovdqa (%rsi),%xmm0 + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm0,(%rdi) + call GFMUL + vmovdqa %xmm0,16(%rdi) + call GFMUL + vmovdqa %xmm0,32(%rdi) + call GFMUL + vmovdqa %xmm0,48(%rdi) + call GFMUL + vmovdqa %xmm0,64(%rdi) + call GFMUL + vmovdqa %xmm0,80(%rdi) + ret +.cfi_endproc +.size aesgcmsiv_htable6_init, .-aesgcmsiv_htable6_init +.globl aesgcmsiv_htable_polyval +.hidden aesgcmsiv_htable_polyval +.type aesgcmsiv_htable_polyval,@function +.align 16 +aesgcmsiv_htable_polyval: +.cfi_startproc +_CET_ENDBR + testq %rdx,%rdx + jnz .Lhtable_polyval_start + ret + +.Lhtable_polyval_start: + vzeroall + + + + movq %rdx,%r11 + andq $127,%r11 + + jz .Lhtable_polyval_no_prefix + + vpxor %xmm9,%xmm9,%xmm9 + vmovdqa (%rcx),%xmm1 + subq %r11,%rdx + + subq $16,%r11 + + + vmovdqu (%rsi),%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm5 + vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm3 + vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm4 + vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + leaq 16(%rsi),%rsi + testq %r11,%r11 + jnz .Lhtable_polyval_prefix_loop + jmp .Lhtable_polyval_prefix_complete + + +.align 64 +.Lhtable_polyval_prefix_loop: + subq $16,%r11 + + vmovdqu (%rsi),%xmm0 + + vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + testq %r11,%r11 + + leaq 16(%rsi),%rsi + + jnz .Lhtable_polyval_prefix_loop + +.Lhtable_polyval_prefix_complete: + vpsrldq $8,%xmm5,%xmm6 + vpslldq $8,%xmm5,%xmm5 + + vpxor %xmm6,%xmm4,%xmm9 + vpxor %xmm5,%xmm3,%xmm1 + + jmp .Lhtable_polyval_main_loop + +.Lhtable_polyval_no_prefix: + + + + + vpxor %xmm1,%xmm1,%xmm1 + vmovdqa (%rcx),%xmm9 + +.align 64 +.Lhtable_polyval_main_loop: + subq $0x80,%rdx + jb .Lhtable_polyval_out + + vmovdqu 112(%rsi),%xmm0 + + vpclmulqdq $0x01,(%rdi),%xmm0,%xmm5 + vpclmulqdq $0x00,(%rdi),%xmm0,%xmm3 + vpclmulqdq $0x11,(%rdi),%xmm0,%xmm4 + vpclmulqdq $0x10,(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vmovdqu 96(%rsi),%xmm0 + vpclmulqdq $0x01,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,16(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + + vmovdqu 80(%rsi),%xmm0 + + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 + vpalignr $8,%xmm1,%xmm1,%xmm1 + + vpclmulqdq $0x01,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,32(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpxor %xmm7,%xmm1,%xmm1 + + vmovdqu 64(%rsi),%xmm0 + + vpclmulqdq $0x01,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,48(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vmovdqu 48(%rsi),%xmm0 + + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 + vpalignr $8,%xmm1,%xmm1,%xmm1 + + vpclmulqdq $0x01,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,64(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpxor %xmm7,%xmm1,%xmm1 + + vmovdqu 32(%rsi),%xmm0 + + vpclmulqdq $0x01,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,80(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpxor %xmm9,%xmm1,%xmm1 + + vmovdqu 16(%rsi),%xmm0 + + vpclmulqdq $0x01,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,96(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vmovdqu 0(%rsi),%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x01,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x00,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm3,%xmm3 + vpclmulqdq $0x11,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm4,%xmm4 + vpclmulqdq $0x10,112(%rdi),%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + + + vpsrldq $8,%xmm5,%xmm6 + vpslldq $8,%xmm5,%xmm5 + + vpxor %xmm6,%xmm4,%xmm9 + vpxor %xmm5,%xmm3,%xmm1 + + leaq 128(%rsi),%rsi + jmp .Lhtable_polyval_main_loop + + + +.Lhtable_polyval_out: + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 + vpalignr $8,%xmm1,%xmm1,%xmm1 + vpxor %xmm6,%xmm1,%xmm1 + + vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 + vpalignr $8,%xmm1,%xmm1,%xmm1 + vpxor %xmm6,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + + vmovdqu %xmm1,(%rcx) + vzeroupper + ret +.cfi_endproc +.size aesgcmsiv_htable_polyval,.-aesgcmsiv_htable_polyval +.globl aesgcmsiv_polyval_horner +.hidden aesgcmsiv_polyval_horner +.type aesgcmsiv_polyval_horner,@function +.align 16 +aesgcmsiv_polyval_horner: +.cfi_startproc +_CET_ENDBR + testq %rcx,%rcx + jnz .Lpolyval_horner_start + ret + +.Lpolyval_horner_start: + + + + xorq %r10,%r10 + shlq $4,%rcx + + vmovdqa (%rsi),%xmm1 + vmovdqa (%rdi),%xmm0 + +.Lpolyval_horner_loop: + vpxor (%rdx,%r10,1),%xmm0,%xmm0 + call GFMUL + + addq $16,%r10 + cmpq %r10,%rcx + jne .Lpolyval_horner_loop + + + vmovdqa %xmm0,(%rdi) + ret +.cfi_endproc +.size aesgcmsiv_polyval_horner,.-aesgcmsiv_polyval_horner +.globl aes128gcmsiv_aes_ks +.hidden aes128gcmsiv_aes_ks +.type aes128gcmsiv_aes_ks,@function +.align 16 +aes128gcmsiv_aes_ks: +.cfi_startproc +_CET_ENDBR + vmovdqu (%rdi),%xmm1 + vmovdqa %xmm1,(%rsi) + + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + + movq $8,%rax + +.Lks128_loop: + addq $16,%rsi + subq $1,%rax + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,(%rsi) + jne .Lks128_loop + + vmovdqa con2(%rip),%xmm0 + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,16(%rsi) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslldq $4,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpslldq $4,%xmm3,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,32(%rsi) + ret +.cfi_endproc +.size aes128gcmsiv_aes_ks,.-aes128gcmsiv_aes_ks +.globl aes256gcmsiv_aes_ks +.hidden aes256gcmsiv_aes_ks +.type aes256gcmsiv_aes_ks,@function +.align 16 +aes256gcmsiv_aes_ks: +.cfi_startproc +_CET_ENDBR + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm3 + vmovdqa %xmm1,(%rsi) + vmovdqa %xmm3,16(%rsi) + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + vpxor %xmm14,%xmm14,%xmm14 + movq $6,%rax + +.Lks256_loop: + addq $32,%rsi + subq $1,%rax + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,(%rsi) + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpsllq $32,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpshufb con3(%rip),%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vmovdqa %xmm3,16(%rsi) + jne .Lks256_loop + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpsllq $32,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm1,32(%rsi) + ret +.cfi_endproc +.globl aes128gcmsiv_aes_ks_enc_x1 +.hidden aes128gcmsiv_aes_ks_enc_x1 +.type aes128gcmsiv_aes_ks_enc_x1,@function +.align 16 +aes128gcmsiv_aes_ks_enc_x1: +.cfi_startproc +_CET_ENDBR + vmovdqa (%rcx),%xmm1 + vmovdqa 0(%rdi),%xmm4 + + vmovdqa %xmm1,(%rdx) + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,16(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,32(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,48(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,64(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,80(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,96(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,112(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,128(%rdx) + + + vmovdqa con2(%rip),%xmm0 + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenc %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,144(%rdx) + + vpshufb %xmm15,%xmm1,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpsllq $32,%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpshufb con3(%rip),%xmm1,%xmm3 + vpxor %xmm3,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + + vaesenclast %xmm1,%xmm4,%xmm4 + vmovdqa %xmm1,160(%rdx) + + + vmovdqa %xmm4,0(%rsi) + ret +.cfi_endproc +.size aes128gcmsiv_aes_ks_enc_x1,.-aes128gcmsiv_aes_ks_enc_x1 +.globl aes128gcmsiv_kdf +.hidden aes128gcmsiv_kdf +.type aes128gcmsiv_kdf,@function +.align 16 +aes128gcmsiv_kdf: +.cfi_startproc +_CET_ENDBR + + + + + vmovdqa (%rdx),%xmm1 + vmovdqa 0(%rdi),%xmm9 + vmovdqa and_mask(%rip),%xmm12 + vmovdqa one(%rip),%xmm13 + vpshufd $0x90,%xmm9,%xmm9 + vpand %xmm12,%xmm9,%xmm9 + vpaddd %xmm13,%xmm9,%xmm10 + vpaddd %xmm13,%xmm10,%xmm11 + vpaddd %xmm13,%xmm11,%xmm12 + + vpxor %xmm1,%xmm9,%xmm9 + vpxor %xmm1,%xmm10,%xmm10 + vpxor %xmm1,%xmm11,%xmm11 + vpxor %xmm1,%xmm12,%xmm12 + + vmovdqa 16(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 32(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 48(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 64(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 80(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 96(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 112(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 128(%rdx),%xmm2 + vaesenc %xmm2,%xmm9,%xmm9 + vaesenc %xmm2,%xmm10,%xmm10 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + + vmovdqa 144(%rdx),%xmm1 + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + + vmovdqa 160(%rdx),%xmm2 + vaesenclast %xmm2,%xmm9,%xmm9 + vaesenclast %xmm2,%xmm10,%xmm10 + vaesenclast %xmm2,%xmm11,%xmm11 + vaesenclast %xmm2,%xmm12,%xmm12 + + + vmovdqa %xmm9,0(%rsi) + vmovdqa %xmm10,16(%rsi) + vmovdqa %xmm11,32(%rsi) + vmovdqa %xmm12,48(%rsi) + ret +.cfi_endproc +.size aes128gcmsiv_kdf,.-aes128gcmsiv_kdf +.globl aes128gcmsiv_enc_msg_x4 +.hidden aes128gcmsiv_enc_msg_x4 +.type aes128gcmsiv_enc_msg_x4,@function +.align 16 +aes128gcmsiv_enc_msg_x4: +.cfi_startproc +_CET_ENDBR + testq %r8,%r8 + jnz .L128_enc_msg_x4_start + ret + +.L128_enc_msg_x4_start: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 + + shrq $4,%r8 + movq %r8,%r10 + shlq $62,%r10 + shrq $62,%r10 + + + vmovdqa (%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + + vmovdqu four(%rip),%xmm4 + vmovdqa %xmm15,%xmm0 + vpaddd one(%rip),%xmm15,%xmm1 + vpaddd two(%rip),%xmm15,%xmm2 + vpaddd three(%rip),%xmm15,%xmm3 + + shrq $2,%r8 + je .L128_enc_msg_x4_check_remainder + + subq $64,%rsi + subq $64,%rdi + +.L128_enc_msg_x4_loop1: + addq $64,%rsi + addq $64,%rdi + + vmovdqa %xmm0,%xmm5 + vmovdqa %xmm1,%xmm6 + vmovdqa %xmm2,%xmm7 + vmovdqa %xmm3,%xmm8 + + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqu 32(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm3,%xmm3 + + vmovdqu 80(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 96(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 112(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 128(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 144(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm12 + vaesenclast %xmm12,%xmm5,%xmm5 + vaesenclast %xmm12,%xmm6,%xmm6 + vaesenclast %xmm12,%xmm7,%xmm7 + vaesenclast %xmm12,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm5,%xmm5 + vpxor 16(%rdi),%xmm6,%xmm6 + vpxor 32(%rdi),%xmm7,%xmm7 + vpxor 48(%rdi),%xmm8,%xmm8 + + subq $1,%r8 + + vmovdqu %xmm5,0(%rsi) + vmovdqu %xmm6,16(%rsi) + vmovdqu %xmm7,32(%rsi) + vmovdqu %xmm8,48(%rsi) + + jne .L128_enc_msg_x4_loop1 + + addq $64,%rsi + addq $64,%rdi + +.L128_enc_msg_x4_check_remainder: + cmpq $0,%r10 + je .L128_enc_msg_x4_out + +.L128_enc_msg_x4_loop2: + + + vmovdqa %xmm0,%xmm5 + vpaddd one(%rip),%xmm0,%xmm0 + + vpxor (%rcx),%xmm5,%xmm5 + vaesenc 16(%rcx),%xmm5,%xmm5 + vaesenc 32(%rcx),%xmm5,%xmm5 + vaesenc 48(%rcx),%xmm5,%xmm5 + vaesenc 64(%rcx),%xmm5,%xmm5 + vaesenc 80(%rcx),%xmm5,%xmm5 + vaesenc 96(%rcx),%xmm5,%xmm5 + vaesenc 112(%rcx),%xmm5,%xmm5 + vaesenc 128(%rcx),%xmm5,%xmm5 + vaesenc 144(%rcx),%xmm5,%xmm5 + vaesenclast 160(%rcx),%xmm5,%xmm5 + + + vpxor (%rdi),%xmm5,%xmm5 + vmovdqu %xmm5,(%rsi) + + addq $16,%rdi + addq $16,%rsi + + subq $1,%r10 + jne .L128_enc_msg_x4_loop2 + +.L128_enc_msg_x4_out: + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret +.cfi_endproc +.size aes128gcmsiv_enc_msg_x4,.-aes128gcmsiv_enc_msg_x4 +.globl aes128gcmsiv_enc_msg_x8 +.hidden aes128gcmsiv_enc_msg_x8 +.type aes128gcmsiv_enc_msg_x8,@function +.align 16 +aes128gcmsiv_enc_msg_x8: +.cfi_startproc +_CET_ENDBR + testq %r8,%r8 + jnz .L128_enc_msg_x8_start + ret + +.L128_enc_msg_x8_start: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-32 + movq %rsp,%rbp +.cfi_def_cfa_register rbp + + + subq $128,%rsp + andq $-64,%rsp + + shrq $4,%r8 + movq %r8,%r10 + shlq $61,%r10 + shrq $61,%r10 + + + vmovdqu (%rdx),%xmm1 + vpor OR_MASK(%rip),%xmm1,%xmm1 + + + vpaddd seven(%rip),%xmm1,%xmm0 + vmovdqu %xmm0,(%rsp) + vpaddd one(%rip),%xmm1,%xmm9 + vpaddd two(%rip),%xmm1,%xmm10 + vpaddd three(%rip),%xmm1,%xmm11 + vpaddd four(%rip),%xmm1,%xmm12 + vpaddd five(%rip),%xmm1,%xmm13 + vpaddd six(%rip),%xmm1,%xmm14 + vmovdqa %xmm1,%xmm0 + + shrq $3,%r8 + je .L128_enc_msg_x8_check_remainder + + subq $128,%rsi + subq $128,%rdi + +.L128_enc_msg_x8_loop1: + addq $128,%rsi + addq $128,%rdi + + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm9,%xmm2 + vmovdqa %xmm10,%xmm3 + vmovdqa %xmm11,%xmm4 + vmovdqa %xmm12,%xmm5 + vmovdqa %xmm13,%xmm6 + vmovdqa %xmm14,%xmm7 + + vmovdqu (%rsp),%xmm8 + + vpxor (%rcx),%xmm1,%xmm1 + vpxor (%rcx),%xmm2,%xmm2 + vpxor (%rcx),%xmm3,%xmm3 + vpxor (%rcx),%xmm4,%xmm4 + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu (%rsp),%xmm14 + vpaddd eight(%rip),%xmm14,%xmm14 + vmovdqu %xmm14,(%rsp) + vmovdqu 32(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpsubd one(%rip),%xmm14,%xmm14 + vmovdqu 48(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm0,%xmm0 + vmovdqu 64(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm9,%xmm9 + vmovdqu 80(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm10,%xmm10 + vmovdqu 96(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm11,%xmm11 + vmovdqu 112(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm12,%xmm12 + vmovdqu 128(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm13,%xmm13 + vmovdqu 144(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm15 + vaesenclast %xmm15,%xmm1,%xmm1 + vaesenclast %xmm15,%xmm2,%xmm2 + vaesenclast %xmm15,%xmm3,%xmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vaesenclast %xmm15,%xmm6,%xmm6 + vaesenclast %xmm15,%xmm7,%xmm7 + vaesenclast %xmm15,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm1,%xmm1 + vpxor 16(%rdi),%xmm2,%xmm2 + vpxor 32(%rdi),%xmm3,%xmm3 + vpxor 48(%rdi),%xmm4,%xmm4 + vpxor 64(%rdi),%xmm5,%xmm5 + vpxor 80(%rdi),%xmm6,%xmm6 + vpxor 96(%rdi),%xmm7,%xmm7 + vpxor 112(%rdi),%xmm8,%xmm8 + + decq %r8 + + vmovdqu %xmm1,0(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + vmovdqu %xmm7,96(%rsi) + vmovdqu %xmm8,112(%rsi) + + jne .L128_enc_msg_x8_loop1 + + addq $128,%rsi + addq $128,%rdi + +.L128_enc_msg_x8_check_remainder: + cmpq $0,%r10 + je .L128_enc_msg_x8_out + +.L128_enc_msg_x8_loop2: + + + vmovdqa %xmm0,%xmm1 + vpaddd one(%rip),%xmm0,%xmm0 + + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenclast 160(%rcx),%xmm1,%xmm1 + + + vpxor (%rdi),%xmm1,%xmm1 + + vmovdqu %xmm1,(%rsi) + + addq $16,%rdi + addq $16,%rsi + + decq %r10 + jne .L128_enc_msg_x8_loop2 + +.L128_enc_msg_x8_out: + movq %rbp,%rsp +.cfi_def_cfa_register %rsp + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret +.cfi_endproc +.size aes128gcmsiv_enc_msg_x8,.-aes128gcmsiv_enc_msg_x8 +.globl aes128gcmsiv_dec +.hidden aes128gcmsiv_dec +.type aes128gcmsiv_dec,@function +.align 16 +aes128gcmsiv_dec: +.cfi_startproc +_CET_ENDBR + testq $~15,%r9 + jnz .L128_dec_start + ret + +.L128_dec_start: + vzeroupper + vmovdqa (%rdx),%xmm0 + + + vmovdqu 16(%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + movq %rdx,%rax + + leaq 32(%rax),%rax + leaq 32(%rcx),%rcx + + andq $~15,%r9 + + + cmpq $96,%r9 + jb .L128_dec_loop2 + + + subq $96,%r9 + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vpxor (%r8),%xmm7,%xmm7 + vpxor (%r8),%xmm8,%xmm8 + vpxor (%r8),%xmm9,%xmm9 + vpxor (%r8),%xmm10,%xmm10 + vpxor (%r8),%xmm11,%xmm11 + vpxor (%r8),%xmm12,%xmm12 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vaesenclast %xmm4,%xmm8,%xmm8 + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm4,%xmm10,%xmm10 + vaesenclast %xmm4,%xmm11,%xmm11 + vaesenclast %xmm4,%xmm12,%xmm12 + + + vpxor 0(%rdi),%xmm7,%xmm7 + vpxor 16(%rdi),%xmm8,%xmm8 + vpxor 32(%rdi),%xmm9,%xmm9 + vpxor 48(%rdi),%xmm10,%xmm10 + vpxor 64(%rdi),%xmm11,%xmm11 + vpxor 80(%rdi),%xmm12,%xmm12 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + addq $96,%rdi + addq $96,%rsi + jmp .L128_dec_loop1 + + +.align 64 +.L128_dec_loop1: + cmpq $96,%r9 + jb .L128_dec_finish_96 + subq $96,%r9 + + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vmovdqa (%r8),%xmm4 + vpxor %xmm4,%xmm7,%xmm7 + vpxor %xmm4,%xmm8,%xmm8 + vpxor %xmm4,%xmm9,%xmm9 + vpxor %xmm4,%xmm10,%xmm10 + vpxor %xmm4,%xmm11,%xmm11 + vpxor %xmm4,%xmm12,%xmm12 + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vmovdqa 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm6 + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor 0(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vpxor 16(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm8,%xmm8 + vpxor 32(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm9,%xmm9 + vpxor 48(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm10,%xmm10 + vpxor 64(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm11,%xmm11 + vpxor 80(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm12,%xmm12 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + vpxor %xmm5,%xmm0,%xmm0 + + leaq 96(%rdi),%rdi + leaq 96(%rsi),%rsi + jmp .L128_dec_loop1 + +.L128_dec_finish_96: + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor %xmm5,%xmm0,%xmm0 + +.L128_dec_loop2: + + + + cmpq $16,%r9 + jb .L128_dec_out + subq $16,%r9 + + vmovdqa %xmm15,%xmm2 + vpaddd one(%rip),%xmm15,%xmm15 + + vpxor 0(%r8),%xmm2,%xmm2 + vaesenc 16(%r8),%xmm2,%xmm2 + vaesenc 32(%r8),%xmm2,%xmm2 + vaesenc 48(%r8),%xmm2,%xmm2 + vaesenc 64(%r8),%xmm2,%xmm2 + vaesenc 80(%r8),%xmm2,%xmm2 + vaesenc 96(%r8),%xmm2,%xmm2 + vaesenc 112(%r8),%xmm2,%xmm2 + vaesenc 128(%r8),%xmm2,%xmm2 + vaesenc 144(%r8),%xmm2,%xmm2 + vaesenclast 160(%r8),%xmm2,%xmm2 + vpxor (%rdi),%xmm2,%xmm2 + vmovdqu %xmm2,(%rsi) + addq $16,%rdi + addq $16,%rsi + + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa -32(%rcx),%xmm1 + call GFMUL + + jmp .L128_dec_loop2 + +.L128_dec_out: + vmovdqu %xmm0,(%rdx) + ret +.cfi_endproc +.size aes128gcmsiv_dec, .-aes128gcmsiv_dec +.globl aes128gcmsiv_ecb_enc_block +.hidden aes128gcmsiv_ecb_enc_block +.type aes128gcmsiv_ecb_enc_block,@function +.align 16 +aes128gcmsiv_ecb_enc_block: +.cfi_startproc +_CET_ENDBR + vmovdqa (%rdi),%xmm1 + + vpxor (%rdx),%xmm1,%xmm1 + vaesenc 16(%rdx),%xmm1,%xmm1 + vaesenc 32(%rdx),%xmm1,%xmm1 + vaesenc 48(%rdx),%xmm1,%xmm1 + vaesenc 64(%rdx),%xmm1,%xmm1 + vaesenc 80(%rdx),%xmm1,%xmm1 + vaesenc 96(%rdx),%xmm1,%xmm1 + vaesenc 112(%rdx),%xmm1,%xmm1 + vaesenc 128(%rdx),%xmm1,%xmm1 + vaesenc 144(%rdx),%xmm1,%xmm1 + vaesenclast 160(%rdx),%xmm1,%xmm1 + + vmovdqa %xmm1,(%rsi) + + ret +.cfi_endproc +.size aes128gcmsiv_ecb_enc_block,.-aes128gcmsiv_ecb_enc_block +.globl aes256gcmsiv_aes_ks_enc_x1 +.hidden aes256gcmsiv_aes_ks_enc_x1 +.type aes256gcmsiv_aes_ks_enc_x1,@function +.align 16 +aes256gcmsiv_aes_ks_enc_x1: +.cfi_startproc +_CET_ENDBR + vmovdqa con1(%rip),%xmm0 + vmovdqa mask(%rip),%xmm15 + vmovdqa (%rdi),%xmm8 + vmovdqa (%rcx),%xmm1 + vmovdqa 16(%rcx),%xmm3 + vpxor %xmm1,%xmm8,%xmm8 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm1,(%rdx) + vmovdqu %xmm3,16(%rdx) + vpxor %xmm14,%xmm14,%xmm14 + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,32(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,48(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,64(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,80(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,96(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,112(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,128(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,144(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,160(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,176(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslld $1,%xmm0,%xmm0 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenc %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,192(%rdx) + + vpshufd $0xff,%xmm1,%xmm2 + vaesenclast %xmm14,%xmm2,%xmm2 + vpslldq $4,%xmm3,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpxor %xmm2,%xmm3,%xmm3 + vaesenc %xmm3,%xmm8,%xmm8 + vmovdqu %xmm3,208(%rdx) + + vpshufb %xmm15,%xmm3,%xmm2 + vaesenclast %xmm0,%xmm2,%xmm2 + vpslldq $4,%xmm1,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpslldq $4,%xmm4,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vaesenclast %xmm1,%xmm8,%xmm8 + vmovdqu %xmm1,224(%rdx) + + vmovdqa %xmm8,(%rsi) + ret +.cfi_endproc +.size aes256gcmsiv_aes_ks_enc_x1,.-aes256gcmsiv_aes_ks_enc_x1 +.globl aes256gcmsiv_ecb_enc_block +.hidden aes256gcmsiv_ecb_enc_block +.type aes256gcmsiv_ecb_enc_block,@function +.align 16 +aes256gcmsiv_ecb_enc_block: +.cfi_startproc +_CET_ENDBR + vmovdqa (%rdi),%xmm1 + vpxor (%rdx),%xmm1,%xmm1 + vaesenc 16(%rdx),%xmm1,%xmm1 + vaesenc 32(%rdx),%xmm1,%xmm1 + vaesenc 48(%rdx),%xmm1,%xmm1 + vaesenc 64(%rdx),%xmm1,%xmm1 + vaesenc 80(%rdx),%xmm1,%xmm1 + vaesenc 96(%rdx),%xmm1,%xmm1 + vaesenc 112(%rdx),%xmm1,%xmm1 + vaesenc 128(%rdx),%xmm1,%xmm1 + vaesenc 144(%rdx),%xmm1,%xmm1 + vaesenc 160(%rdx),%xmm1,%xmm1 + vaesenc 176(%rdx),%xmm1,%xmm1 + vaesenc 192(%rdx),%xmm1,%xmm1 + vaesenc 208(%rdx),%xmm1,%xmm1 + vaesenclast 224(%rdx),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsi) + ret +.cfi_endproc +.size aes256gcmsiv_ecb_enc_block,.-aes256gcmsiv_ecb_enc_block +.globl aes256gcmsiv_enc_msg_x4 +.hidden aes256gcmsiv_enc_msg_x4 +.type aes256gcmsiv_enc_msg_x4,@function +.align 16 +aes256gcmsiv_enc_msg_x4: +.cfi_startproc +_CET_ENDBR + testq %r8,%r8 + jnz .L256_enc_msg_x4_start + ret + +.L256_enc_msg_x4_start: + movq %r8,%r10 + shrq $4,%r8 + shlq $60,%r10 + jz .L256_enc_msg_x4_start2 + addq $1,%r8 + +.L256_enc_msg_x4_start2: + movq %r8,%r10 + shlq $62,%r10 + shrq $62,%r10 + + + vmovdqa (%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + + vmovdqa four(%rip),%xmm4 + vmovdqa %xmm15,%xmm0 + vpaddd one(%rip),%xmm15,%xmm1 + vpaddd two(%rip),%xmm15,%xmm2 + vpaddd three(%rip),%xmm15,%xmm3 + + shrq $2,%r8 + je .L256_enc_msg_x4_check_remainder + + subq $64,%rsi + subq $64,%rdi + +.L256_enc_msg_x4_loop1: + addq $64,%rsi + addq $64,%rdi + + vmovdqa %xmm0,%xmm5 + vmovdqa %xmm1,%xmm6 + vmovdqa %xmm2,%xmm7 + vmovdqa %xmm3,%xmm8 + + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqu 32(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vpaddd %xmm4,%xmm3,%xmm3 + + vmovdqu 80(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 96(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 112(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 128(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 144(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 176(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 192(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 208(%rcx),%xmm12 + vaesenc %xmm12,%xmm5,%xmm5 + vaesenc %xmm12,%xmm6,%xmm6 + vaesenc %xmm12,%xmm7,%xmm7 + vaesenc %xmm12,%xmm8,%xmm8 + + vmovdqu 224(%rcx),%xmm12 + vaesenclast %xmm12,%xmm5,%xmm5 + vaesenclast %xmm12,%xmm6,%xmm6 + vaesenclast %xmm12,%xmm7,%xmm7 + vaesenclast %xmm12,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm5,%xmm5 + vpxor 16(%rdi),%xmm6,%xmm6 + vpxor 32(%rdi),%xmm7,%xmm7 + vpxor 48(%rdi),%xmm8,%xmm8 + + subq $1,%r8 + + vmovdqu %xmm5,0(%rsi) + vmovdqu %xmm6,16(%rsi) + vmovdqu %xmm7,32(%rsi) + vmovdqu %xmm8,48(%rsi) + + jne .L256_enc_msg_x4_loop1 + + addq $64,%rsi + addq $64,%rdi + +.L256_enc_msg_x4_check_remainder: + cmpq $0,%r10 + je .L256_enc_msg_x4_out + +.L256_enc_msg_x4_loop2: + + + + vmovdqa %xmm0,%xmm5 + vpaddd one(%rip),%xmm0,%xmm0 + vpxor (%rcx),%xmm5,%xmm5 + vaesenc 16(%rcx),%xmm5,%xmm5 + vaesenc 32(%rcx),%xmm5,%xmm5 + vaesenc 48(%rcx),%xmm5,%xmm5 + vaesenc 64(%rcx),%xmm5,%xmm5 + vaesenc 80(%rcx),%xmm5,%xmm5 + vaesenc 96(%rcx),%xmm5,%xmm5 + vaesenc 112(%rcx),%xmm5,%xmm5 + vaesenc 128(%rcx),%xmm5,%xmm5 + vaesenc 144(%rcx),%xmm5,%xmm5 + vaesenc 160(%rcx),%xmm5,%xmm5 + vaesenc 176(%rcx),%xmm5,%xmm5 + vaesenc 192(%rcx),%xmm5,%xmm5 + vaesenc 208(%rcx),%xmm5,%xmm5 + vaesenclast 224(%rcx),%xmm5,%xmm5 + + + vpxor (%rdi),%xmm5,%xmm5 + + vmovdqu %xmm5,(%rsi) + + addq $16,%rdi + addq $16,%rsi + + subq $1,%r10 + jne .L256_enc_msg_x4_loop2 + +.L256_enc_msg_x4_out: + ret +.cfi_endproc +.size aes256gcmsiv_enc_msg_x4,.-aes256gcmsiv_enc_msg_x4 +.globl aes256gcmsiv_enc_msg_x8 +.hidden aes256gcmsiv_enc_msg_x8 +.type aes256gcmsiv_enc_msg_x8,@function +.align 16 +aes256gcmsiv_enc_msg_x8: +.cfi_startproc +_CET_ENDBR + testq %r8,%r8 + jnz .L256_enc_msg_x8_start + ret + +.L256_enc_msg_x8_start: + + movq %rsp,%r11 + subq $16,%r11 + andq $-64,%r11 + + movq %r8,%r10 + shrq $4,%r8 + shlq $60,%r10 + jz .L256_enc_msg_x8_start2 + addq $1,%r8 + +.L256_enc_msg_x8_start2: + movq %r8,%r10 + shlq $61,%r10 + shrq $61,%r10 + + + vmovdqa (%rdx),%xmm1 + vpor OR_MASK(%rip),%xmm1,%xmm1 + + + vpaddd seven(%rip),%xmm1,%xmm0 + vmovdqa %xmm0,(%r11) + vpaddd one(%rip),%xmm1,%xmm9 + vpaddd two(%rip),%xmm1,%xmm10 + vpaddd three(%rip),%xmm1,%xmm11 + vpaddd four(%rip),%xmm1,%xmm12 + vpaddd five(%rip),%xmm1,%xmm13 + vpaddd six(%rip),%xmm1,%xmm14 + vmovdqa %xmm1,%xmm0 + + shrq $3,%r8 + jz .L256_enc_msg_x8_check_remainder + + subq $128,%rsi + subq $128,%rdi + +.L256_enc_msg_x8_loop1: + addq $128,%rsi + addq $128,%rdi + + vmovdqa %xmm0,%xmm1 + vmovdqa %xmm9,%xmm2 + vmovdqa %xmm10,%xmm3 + vmovdqa %xmm11,%xmm4 + vmovdqa %xmm12,%xmm5 + vmovdqa %xmm13,%xmm6 + vmovdqa %xmm14,%xmm7 + + vmovdqa (%r11),%xmm8 + + vpxor (%rcx),%xmm1,%xmm1 + vpxor (%rcx),%xmm2,%xmm2 + vpxor (%rcx),%xmm3,%xmm3 + vpxor (%rcx),%xmm4,%xmm4 + vpxor (%rcx),%xmm5,%xmm5 + vpxor (%rcx),%xmm6,%xmm6 + vpxor (%rcx),%xmm7,%xmm7 + vpxor (%rcx),%xmm8,%xmm8 + + vmovdqu 16(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqa (%r11),%xmm14 + vpaddd eight(%rip),%xmm14,%xmm14 + vmovdqa %xmm14,(%r11) + vmovdqu 32(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpsubd one(%rip),%xmm14,%xmm14 + vmovdqu 48(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm0,%xmm0 + vmovdqu 64(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm9,%xmm9 + vmovdqu 80(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm10,%xmm10 + vmovdqu 96(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm11,%xmm11 + vmovdqu 112(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm12,%xmm12 + vmovdqu 128(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vpaddd eight(%rip),%xmm13,%xmm13 + vmovdqu 144(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 160(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 176(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 192(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 208(%rcx),%xmm15 + vaesenc %xmm15,%xmm1,%xmm1 + vaesenc %xmm15,%xmm2,%xmm2 + vaesenc %xmm15,%xmm3,%xmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vaesenc %xmm15,%xmm6,%xmm6 + vaesenc %xmm15,%xmm7,%xmm7 + vaesenc %xmm15,%xmm8,%xmm8 + + vmovdqu 224(%rcx),%xmm15 + vaesenclast %xmm15,%xmm1,%xmm1 + vaesenclast %xmm15,%xmm2,%xmm2 + vaesenclast %xmm15,%xmm3,%xmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vaesenclast %xmm15,%xmm6,%xmm6 + vaesenclast %xmm15,%xmm7,%xmm7 + vaesenclast %xmm15,%xmm8,%xmm8 + + + + vpxor 0(%rdi),%xmm1,%xmm1 + vpxor 16(%rdi),%xmm2,%xmm2 + vpxor 32(%rdi),%xmm3,%xmm3 + vpxor 48(%rdi),%xmm4,%xmm4 + vpxor 64(%rdi),%xmm5,%xmm5 + vpxor 80(%rdi),%xmm6,%xmm6 + vpxor 96(%rdi),%xmm7,%xmm7 + vpxor 112(%rdi),%xmm8,%xmm8 + + subq $1,%r8 + + vmovdqu %xmm1,0(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + vmovdqu %xmm7,96(%rsi) + vmovdqu %xmm8,112(%rsi) + + jne .L256_enc_msg_x8_loop1 + + addq $128,%rsi + addq $128,%rdi + +.L256_enc_msg_x8_check_remainder: + cmpq $0,%r10 + je .L256_enc_msg_x8_out + +.L256_enc_msg_x8_loop2: + + + vmovdqa %xmm0,%xmm1 + vpaddd one(%rip),%xmm0,%xmm0 + + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenc 160(%rcx),%xmm1,%xmm1 + vaesenc 176(%rcx),%xmm1,%xmm1 + vaesenc 192(%rcx),%xmm1,%xmm1 + vaesenc 208(%rcx),%xmm1,%xmm1 + vaesenclast 224(%rcx),%xmm1,%xmm1 + + + vpxor (%rdi),%xmm1,%xmm1 + + vmovdqu %xmm1,(%rsi) + + addq $16,%rdi + addq $16,%rsi + subq $1,%r10 + jnz .L256_enc_msg_x8_loop2 + +.L256_enc_msg_x8_out: + ret + +.cfi_endproc +.size aes256gcmsiv_enc_msg_x8,.-aes256gcmsiv_enc_msg_x8 +.globl aes256gcmsiv_dec +.hidden aes256gcmsiv_dec +.type aes256gcmsiv_dec,@function +.align 16 +aes256gcmsiv_dec: +.cfi_startproc +_CET_ENDBR + testq $~15,%r9 + jnz .L256_dec_start + ret + +.L256_dec_start: + vzeroupper + vmovdqa (%rdx),%xmm0 + + + vmovdqu 16(%rdx),%xmm15 + vpor OR_MASK(%rip),%xmm15,%xmm15 + movq %rdx,%rax + + leaq 32(%rax),%rax + leaq 32(%rcx),%rcx + + andq $~15,%r9 + + + cmpq $96,%r9 + jb .L256_dec_loop2 + + + subq $96,%r9 + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vpxor (%r8),%xmm7,%xmm7 + vpxor (%r8),%xmm8,%xmm8 + vpxor (%r8),%xmm9,%xmm9 + vpxor (%r8),%xmm10,%xmm10 + vpxor (%r8),%xmm11,%xmm11 + vpxor (%r8),%xmm12,%xmm12 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 176(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 192(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 208(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 224(%r8),%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vaesenclast %xmm4,%xmm8,%xmm8 + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm4,%xmm10,%xmm10 + vaesenclast %xmm4,%xmm11,%xmm11 + vaesenclast %xmm4,%xmm12,%xmm12 + + + vpxor 0(%rdi),%xmm7,%xmm7 + vpxor 16(%rdi),%xmm8,%xmm8 + vpxor 32(%rdi),%xmm9,%xmm9 + vpxor 48(%rdi),%xmm10,%xmm10 + vpxor 64(%rdi),%xmm11,%xmm11 + vpxor 80(%rdi),%xmm12,%xmm12 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + addq $96,%rdi + addq $96,%rsi + jmp .L256_dec_loop1 + + +.align 64 +.L256_dec_loop1: + cmpq $96,%r9 + jb .L256_dec_finish_96 + subq $96,%r9 + + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqa %xmm15,%xmm7 + vpaddd one(%rip),%xmm7,%xmm8 + vpaddd two(%rip),%xmm7,%xmm9 + vpaddd one(%rip),%xmm9,%xmm10 + vpaddd two(%rip),%xmm9,%xmm11 + vpaddd one(%rip),%xmm11,%xmm12 + vpaddd two(%rip),%xmm11,%xmm15 + + vmovdqa (%r8),%xmm4 + vpxor %xmm4,%xmm7,%xmm7 + vpxor %xmm4,%xmm8,%xmm8 + vpxor %xmm4,%xmm9,%xmm9 + vpxor %xmm4,%xmm10,%xmm10 + vpxor %xmm4,%xmm11,%xmm11 + vpxor %xmm4,%xmm12,%xmm12 + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 32(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 48(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 64(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 96(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 112(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vmovdqa 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 128(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vmovdqu 144(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 160(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 176(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 192(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 208(%r8),%xmm4 + vaesenc %xmm4,%xmm7,%xmm7 + vaesenc %xmm4,%xmm8,%xmm8 + vaesenc %xmm4,%xmm9,%xmm9 + vaesenc %xmm4,%xmm10,%xmm10 + vaesenc %xmm4,%xmm11,%xmm11 + vaesenc %xmm4,%xmm12,%xmm12 + + vmovdqu 224(%r8),%xmm6 + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor 0(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm7,%xmm7 + vpxor 16(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm8,%xmm8 + vpxor 32(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm9,%xmm9 + vpxor 48(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm10,%xmm10 + vpxor 64(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm11,%xmm11 + vpxor 80(%rdi),%xmm6,%xmm4 + vaesenclast %xmm4,%xmm12,%xmm12 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vmovdqu %xmm7,0(%rsi) + vmovdqu %xmm8,16(%rsi) + vmovdqu %xmm9,32(%rsi) + vmovdqu %xmm10,48(%rsi) + vmovdqu %xmm11,64(%rsi) + vmovdqu %xmm12,80(%rsi) + + vpxor %xmm5,%xmm0,%xmm0 + + leaq 96(%rdi),%rdi + leaq 96(%rsi),%rsi + jmp .L256_dec_loop1 + +.L256_dec_finish_96: + vmovdqa %xmm12,%xmm6 + vmovdqa %xmm11,16-32(%rax) + vmovdqa %xmm10,32-32(%rax) + vmovdqa %xmm9,48-32(%rax) + vmovdqa %xmm8,64-32(%rax) + vmovdqa %xmm7,80-32(%rax) + + vmovdqu 0-32(%rcx),%xmm4 + vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 + vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 + vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu -16(%rax),%xmm6 + vmovdqu -16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 0(%rax),%xmm6 + vmovdqu 0(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 16(%rax),%xmm6 + vmovdqu 16(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vmovdqu 32(%rax),%xmm6 + vmovdqu 32(%rcx),%xmm13 + + vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + + vmovdqu 80-32(%rax),%xmm6 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 80-32(%rcx),%xmm5 + vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm2,%xmm5 + vpslldq $8,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm0 + + vmovdqa poly(%rip),%xmm3 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpalignr $8,%xmm0,%xmm0,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 + vpxor %xmm0,%xmm2,%xmm0 + + vpxor %xmm5,%xmm0,%xmm0 + +.L256_dec_loop2: + + + + cmpq $16,%r9 + jb .L256_dec_out + subq $16,%r9 + + vmovdqa %xmm15,%xmm2 + vpaddd one(%rip),%xmm15,%xmm15 + + vpxor 0(%r8),%xmm2,%xmm2 + vaesenc 16(%r8),%xmm2,%xmm2 + vaesenc 32(%r8),%xmm2,%xmm2 + vaesenc 48(%r8),%xmm2,%xmm2 + vaesenc 64(%r8),%xmm2,%xmm2 + vaesenc 80(%r8),%xmm2,%xmm2 + vaesenc 96(%r8),%xmm2,%xmm2 + vaesenc 112(%r8),%xmm2,%xmm2 + vaesenc 128(%r8),%xmm2,%xmm2 + vaesenc 144(%r8),%xmm2,%xmm2 + vaesenc 160(%r8),%xmm2,%xmm2 + vaesenc 176(%r8),%xmm2,%xmm2 + vaesenc 192(%r8),%xmm2,%xmm2 + vaesenc 208(%r8),%xmm2,%xmm2 + vaesenclast 224(%r8),%xmm2,%xmm2 + vpxor (%rdi),%xmm2,%xmm2 + vmovdqu %xmm2,(%rsi) + addq $16,%rdi + addq $16,%rsi + + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa -32(%rcx),%xmm1 + call GFMUL + + jmp .L256_dec_loop2 + +.L256_dec_out: + vmovdqu %xmm0,(%rdx) + ret +.cfi_endproc +.size aes256gcmsiv_dec, .-aes256gcmsiv_dec +.globl aes256gcmsiv_kdf +.hidden aes256gcmsiv_kdf +.type aes256gcmsiv_kdf,@function +.align 16 +aes256gcmsiv_kdf: +.cfi_startproc +_CET_ENDBR + + + + + vmovdqa (%rdx),%xmm1 + vmovdqa 0(%rdi),%xmm4 + vmovdqa and_mask(%rip),%xmm11 + vmovdqa one(%rip),%xmm8 + vpshufd $0x90,%xmm4,%xmm4 + vpand %xmm11,%xmm4,%xmm4 + vpaddd %xmm8,%xmm4,%xmm6 + vpaddd %xmm8,%xmm6,%xmm7 + vpaddd %xmm8,%xmm7,%xmm11 + vpaddd %xmm8,%xmm11,%xmm12 + vpaddd %xmm8,%xmm12,%xmm13 + + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm1,%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm1,%xmm11,%xmm11 + vpxor %xmm1,%xmm12,%xmm12 + vpxor %xmm1,%xmm13,%xmm13 + + vmovdqa 16(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 32(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 48(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 64(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 80(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 96(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 112(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 128(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 144(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 160(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 176(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 192(%rdx),%xmm2 + vaesenc %xmm2,%xmm4,%xmm4 + vaesenc %xmm2,%xmm6,%xmm6 + vaesenc %xmm2,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vaesenc %xmm2,%xmm12,%xmm12 + vaesenc %xmm2,%xmm13,%xmm13 + + vmovdqa 208(%rdx),%xmm1 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + + vmovdqa 224(%rdx),%xmm2 + vaesenclast %xmm2,%xmm4,%xmm4 + vaesenclast %xmm2,%xmm6,%xmm6 + vaesenclast %xmm2,%xmm7,%xmm7 + vaesenclast %xmm2,%xmm11,%xmm11 + vaesenclast %xmm2,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + + + vmovdqa %xmm4,0(%rsi) + vmovdqa %xmm6,16(%rsi) + vmovdqa %xmm7,32(%rsi) + vmovdqa %xmm11,48(%rsi) + vmovdqa %xmm12,64(%rsi) + vmovdqa %xmm13,80(%rsi) + ret +.cfi_endproc +.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf +#endif diff --git a/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-win.asm b/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-win.asm new file mode 100644 index 0000000000..6691a2ddfb --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/aes128gcmsiv-x86_64-win.asm @@ -0,0 +1,3302 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .rdata rdata align=8 + +ALIGN 16 +one: + DQ 1,0 +two: + DQ 2,0 +three: + DQ 3,0 +four: + DQ 4,0 +five: + DQ 5,0 +six: + DQ 6,0 +seven: + DQ 7,0 +eight: + DQ 8,0 + +OR_MASK: + DD 0x00000000,0x00000000,0x00000000,0x80000000 +poly: + DQ 0x1,0xc200000000000000 +mask: + DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +con1: + DD 1,1,1,1 +con2: + DD 0x1b,0x1b,0x1b,0x1b +con3: + DB -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 +and_mask: + DD 0,0xffffffff,0xffffffff,0xffffffff +section .text code align=64 + + +ALIGN 16 +GFMUL: + + vpclmulqdq xmm2,xmm0,xmm1,0x00 + vpclmulqdq xmm5,xmm0,xmm1,0x11 + vpclmulqdq xmm3,xmm0,xmm1,0x10 + vpclmulqdq xmm4,xmm0,xmm1,0x01 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm3,8 + vpsrldq xmm3,xmm3,8 + vpxor xmm2,xmm2,xmm4 + vpxor xmm5,xmm5,xmm3 + + vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10 + vpshufd xmm4,xmm2,78 + vpxor xmm2,xmm3,xmm4 + + vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10 + vpshufd xmm4,xmm2,78 + vpxor xmm2,xmm3,xmm4 + + vpxor xmm0,xmm2,xmm5 + ret + + +global aesgcmsiv_htable_init + +ALIGN 16 +aesgcmsiv_htable_init: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesgcmsiv_htable_init: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + vmovdqa xmm0,XMMWORD[rsi] + vmovdqa xmm1,xmm0 + vmovdqa XMMWORD[rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[16+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[32+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[48+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[64+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[80+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[96+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[112+rdi],xmm0 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aesgcmsiv_htable_init: +global aesgcmsiv_htable6_init + +ALIGN 16 +aesgcmsiv_htable6_init: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesgcmsiv_htable6_init: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + vmovdqa xmm0,XMMWORD[rsi] + vmovdqa xmm1,xmm0 + vmovdqa XMMWORD[rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[16+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[32+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[48+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[64+rdi],xmm0 + call GFMUL + vmovdqa XMMWORD[80+rdi],xmm0 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aesgcmsiv_htable6_init: +global aesgcmsiv_htable_polyval + +ALIGN 16 +aesgcmsiv_htable_polyval: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesgcmsiv_htable_polyval: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + +_CET_ENDBR + test rdx,rdx + jnz NEAR $L$htable_polyval_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$htable_polyval_start: + vzeroall + + + + mov r11,rdx + and r11,127 + + jz NEAR $L$htable_polyval_no_prefix + + vpxor xmm9,xmm9,xmm9 + vmovdqa xmm1,XMMWORD[rcx] + sub rdx,r11 + + sub r11,16 + + + vmovdqu xmm0,XMMWORD[rsi] + vpxor xmm0,xmm0,xmm1 + + vpclmulqdq xmm5,xmm0,XMMWORD[r11*1+rdi],0x01 + vpclmulqdq xmm3,xmm0,XMMWORD[r11*1+rdi],0x00 + vpclmulqdq xmm4,xmm0,XMMWORD[r11*1+rdi],0x11 + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + lea rsi,[16+rsi] + test r11,r11 + jnz NEAR $L$htable_polyval_prefix_loop + jmp NEAR $L$htable_polyval_prefix_complete + + +ALIGN 64 +$L$htable_polyval_prefix_loop: + sub r11,16 + + vmovdqu xmm0,XMMWORD[rsi] + + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + test r11,r11 + + lea rsi,[16+rsi] + + jnz NEAR $L$htable_polyval_prefix_loop + +$L$htable_polyval_prefix_complete: + vpsrldq xmm6,xmm5,8 + vpslldq xmm5,xmm5,8 + + vpxor xmm9,xmm4,xmm6 + vpxor xmm1,xmm3,xmm5 + + jmp NEAR $L$htable_polyval_main_loop + +$L$htable_polyval_no_prefix: + + + + + vpxor xmm1,xmm1,xmm1 + vmovdqa xmm9,XMMWORD[rcx] + +ALIGN 64 +$L$htable_polyval_main_loop: + sub rdx,0x80 + jb NEAR $L$htable_polyval_out + + vmovdqu xmm0,XMMWORD[112+rsi] + + vpclmulqdq xmm5,xmm0,XMMWORD[rdi],0x01 + vpclmulqdq xmm3,xmm0,XMMWORD[rdi],0x00 + vpclmulqdq xmm4,xmm0,XMMWORD[rdi],0x11 + vpclmulqdq xmm6,xmm0,XMMWORD[rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vmovdqu xmm0,XMMWORD[96+rsi] + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + + vmovdqu xmm0,XMMWORD[80+rsi] + + vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10 + vpalignr xmm1,xmm1,xmm1,8 + + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vpxor xmm1,xmm1,xmm7 + + vmovdqu xmm0,XMMWORD[64+rsi] + + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vmovdqu xmm0,XMMWORD[48+rsi] + + vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10 + vpalignr xmm1,xmm1,xmm1,8 + + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vpxor xmm1,xmm1,xmm7 + + vmovdqu xmm0,XMMWORD[32+rsi] + + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vpxor xmm1,xmm1,xmm9 + + vmovdqu xmm0,XMMWORD[16+rsi] + + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vmovdqu xmm0,XMMWORD[rsi] + vpxor xmm0,xmm0,xmm1 + + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x01 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x00 + vpxor xmm3,xmm3,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x11 + vpxor xmm4,xmm4,xmm6 + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x10 + vpxor xmm5,xmm5,xmm6 + + + vpsrldq xmm6,xmm5,8 + vpslldq xmm5,xmm5,8 + + vpxor xmm9,xmm4,xmm6 + vpxor xmm1,xmm3,xmm5 + + lea rsi,[128+rsi] + jmp NEAR $L$htable_polyval_main_loop + + + +$L$htable_polyval_out: + vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10 + vpalignr xmm1,xmm1,xmm1,8 + vpxor xmm1,xmm1,xmm6 + + vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10 + vpalignr xmm1,xmm1,xmm1,8 + vpxor xmm1,xmm1,xmm6 + vpxor xmm1,xmm1,xmm9 + + vmovdqu XMMWORD[rcx],xmm1 + vzeroupper + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aesgcmsiv_htable_polyval: +global aesgcmsiv_polyval_horner + +ALIGN 16 +aesgcmsiv_polyval_horner: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesgcmsiv_polyval_horner: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + +_CET_ENDBR + test rcx,rcx + jnz NEAR $L$polyval_horner_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$polyval_horner_start: + + + + xor r10,r10 + shl rcx,4 + + vmovdqa xmm1,XMMWORD[rsi] + vmovdqa xmm0,XMMWORD[rdi] + +$L$polyval_horner_loop: + vpxor xmm0,xmm0,XMMWORD[r10*1+rdx] + call GFMUL + + add r10,16 + cmp rcx,r10 + jne NEAR $L$polyval_horner_loop + + + vmovdqa XMMWORD[rdi],xmm0 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aesgcmsiv_polyval_horner: +global aes128gcmsiv_aes_ks + +ALIGN 16 +aes128gcmsiv_aes_ks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_aes_ks: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + vmovdqu xmm1,XMMWORD[rdi] + vmovdqa XMMWORD[rsi],xmm1 + + vmovdqa xmm0,XMMWORD[con1] + vmovdqa xmm15,XMMWORD[mask] + + mov rax,8 + +$L$ks128_loop: + add rsi,16 + sub rax,1 + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm3,xmm1,4 + vpxor xmm1,xmm1,xmm3 + vpslldq xmm3,xmm3,4 + vpxor xmm1,xmm1,xmm3 + vpslldq xmm3,xmm3,4 + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD[rsi],xmm1 + jne NEAR $L$ks128_loop + + vmovdqa xmm0,XMMWORD[con2] + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm3,xmm1,4 + vpxor xmm1,xmm1,xmm3 + vpslldq xmm3,xmm3,4 + vpxor xmm1,xmm1,xmm3 + vpslldq xmm3,xmm3,4 + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD[16+rsi],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslldq xmm3,xmm1,4 + vpxor xmm1,xmm1,xmm3 + vpslldq xmm3,xmm3,4 + vpxor xmm1,xmm1,xmm3 + vpslldq xmm3,xmm3,4 + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD[32+rsi],xmm1 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_aes_ks: +global aes256gcmsiv_aes_ks + +ALIGN 16 +aes256gcmsiv_aes_ks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_aes_ks: + mov rdi,rcx + mov rsi,rdx + + + +_CET_ENDBR + vmovdqu xmm1,XMMWORD[rdi] + vmovdqu xmm3,XMMWORD[16+rdi] + vmovdqa XMMWORD[rsi],xmm1 + vmovdqa XMMWORD[16+rsi],xmm3 + vmovdqa xmm0,XMMWORD[con1] + vmovdqa xmm15,XMMWORD[mask] + vpxor xmm14,xmm14,xmm14 + mov rax,6 + +$L$ks256_loop: + add rsi,32 + sub rax,1 + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm4,xmm1,32 + vpxor xmm1,xmm1,xmm4 + vpshufb xmm4,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD[rsi],xmm1 + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpsllq xmm4,xmm3,32 + vpxor xmm3,xmm3,xmm4 + vpshufb xmm4,xmm3,XMMWORD[con3] + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vmovdqa XMMWORD[16+rsi],xmm3 + jne NEAR $L$ks256_loop + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpsllq xmm4,xmm1,32 + vpxor xmm1,xmm1,xmm4 + vpshufb xmm4,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD[32+rsi],xmm1 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +global aes128gcmsiv_aes_ks_enc_x1 + +ALIGN 16 +aes128gcmsiv_aes_ks_enc_x1: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + +_CET_ENDBR + vmovdqa xmm1,XMMWORD[rcx] + vmovdqa xmm4,XMMWORD[rdi] + + vmovdqa XMMWORD[rdx],xmm1 + vpxor xmm4,xmm4,xmm1 + + vmovdqa xmm0,XMMWORD[con1] + vmovdqa xmm15,XMMWORD[mask] + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[16+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[32+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[48+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[64+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[80+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[96+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[112+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[128+rdx],xmm1 + + + vmovdqa xmm0,XMMWORD[con2] + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenc xmm4,xmm4,xmm1 + vmovdqa XMMWORD[144+rdx],xmm1 + + vpshufb xmm2,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpsllq xmm3,xmm1,32 + vpxor xmm1,xmm1,xmm3 + vpshufb xmm3,xmm1,XMMWORD[con3] + vpxor xmm1,xmm1,xmm3 + vpxor xmm1,xmm1,xmm2 + + vaesenclast xmm4,xmm4,xmm1 + vmovdqa XMMWORD[160+rdx],xmm1 + + + vmovdqa XMMWORD[rsi],xmm4 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_aes_ks_enc_x1: +global aes128gcmsiv_kdf + +ALIGN 16 +aes128gcmsiv_kdf: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_kdf: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + + + + + vmovdqa xmm1,XMMWORD[rdx] + vmovdqa xmm9,XMMWORD[rdi] + vmovdqa xmm12,XMMWORD[and_mask] + vmovdqa xmm13,XMMWORD[one] + vpshufd xmm9,xmm9,0x90 + vpand xmm9,xmm9,xmm12 + vpaddd xmm10,xmm9,xmm13 + vpaddd xmm11,xmm10,xmm13 + vpaddd xmm12,xmm11,xmm13 + + vpxor xmm9,xmm9,xmm1 + vpxor xmm10,xmm10,xmm1 + vpxor xmm11,xmm11,xmm1 + vpxor xmm12,xmm12,xmm1 + + vmovdqa xmm1,XMMWORD[16+rdx] + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + + vmovdqa xmm2,XMMWORD[32+rdx] + vaesenc xmm9,xmm9,xmm2 + vaesenc xmm10,xmm10,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + + vmovdqa xmm1,XMMWORD[48+rdx] + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + + vmovdqa xmm2,XMMWORD[64+rdx] + vaesenc xmm9,xmm9,xmm2 + vaesenc xmm10,xmm10,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + + vmovdqa xmm1,XMMWORD[80+rdx] + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + + vmovdqa xmm2,XMMWORD[96+rdx] + vaesenc xmm9,xmm9,xmm2 + vaesenc xmm10,xmm10,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + + vmovdqa xmm1,XMMWORD[112+rdx] + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + + vmovdqa xmm2,XMMWORD[128+rdx] + vaesenc xmm9,xmm9,xmm2 + vaesenc xmm10,xmm10,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + + vmovdqa xmm1,XMMWORD[144+rdx] + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + + vmovdqa xmm2,XMMWORD[160+rdx] + vaesenclast xmm9,xmm9,xmm2 + vaesenclast xmm10,xmm10,xmm2 + vaesenclast xmm11,xmm11,xmm2 + vaesenclast xmm12,xmm12,xmm2 + + + vmovdqa XMMWORD[rsi],xmm9 + vmovdqa XMMWORD[16+rsi],xmm10 + vmovdqa XMMWORD[32+rsi],xmm11 + vmovdqa XMMWORD[48+rsi],xmm12 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_kdf: +global aes128gcmsiv_enc_msg_x4 + +ALIGN 16 +aes128gcmsiv_enc_msg_x4: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_enc_msg_x4: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + test r8,r8 + jnz NEAR $L$128_enc_msg_x4_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$128_enc_msg_x4_start: + push r12 + + push r13 + + + shr r8,4 + mov r10,r8 + shl r10,62 + shr r10,62 + + + vmovdqa xmm15,XMMWORD[rdx] + vpor xmm15,xmm15,XMMWORD[OR_MASK] + + vmovdqu xmm4,XMMWORD[four] + vmovdqa xmm0,xmm15 + vpaddd xmm1,xmm15,XMMWORD[one] + vpaddd xmm2,xmm15,XMMWORD[two] + vpaddd xmm3,xmm15,XMMWORD[three] + + shr r8,2 + je NEAR $L$128_enc_msg_x4_check_remainder + + sub rsi,64 + sub rdi,64 + +$L$128_enc_msg_x4_loop1: + add rsi,64 + add rdi,64 + + vmovdqa xmm5,xmm0 + vmovdqa xmm6,xmm1 + vmovdqa xmm7,xmm2 + vmovdqa xmm8,xmm3 + + vpxor xmm5,xmm5,XMMWORD[rcx] + vpxor xmm6,xmm6,XMMWORD[rcx] + vpxor xmm7,xmm7,XMMWORD[rcx] + vpxor xmm8,xmm8,XMMWORD[rcx] + + vmovdqu xmm12,XMMWORD[16+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm0,xmm0,xmm4 + vmovdqu xmm12,XMMWORD[32+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm1,xmm1,xmm4 + vmovdqu xmm12,XMMWORD[48+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm2,xmm2,xmm4 + vmovdqu xmm12,XMMWORD[64+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm3,xmm3,xmm4 + + vmovdqu xmm12,XMMWORD[80+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[96+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[112+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[128+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[144+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[160+rcx] + vaesenclast xmm5,xmm5,xmm12 + vaesenclast xmm6,xmm6,xmm12 + vaesenclast xmm7,xmm7,xmm12 + vaesenclast xmm8,xmm8,xmm12 + + + + vpxor xmm5,xmm5,XMMWORD[rdi] + vpxor xmm6,xmm6,XMMWORD[16+rdi] + vpxor xmm7,xmm7,XMMWORD[32+rdi] + vpxor xmm8,xmm8,XMMWORD[48+rdi] + + sub r8,1 + + vmovdqu XMMWORD[rsi],xmm5 + vmovdqu XMMWORD[16+rsi],xmm6 + vmovdqu XMMWORD[32+rsi],xmm7 + vmovdqu XMMWORD[48+rsi],xmm8 + + jne NEAR $L$128_enc_msg_x4_loop1 + + add rsi,64 + add rdi,64 + +$L$128_enc_msg_x4_check_remainder: + cmp r10,0 + je NEAR $L$128_enc_msg_x4_out + +$L$128_enc_msg_x4_loop2: + + + vmovdqa xmm5,xmm0 + vpaddd xmm0,xmm0,XMMWORD[one] + + vpxor xmm5,xmm5,XMMWORD[rcx] + vaesenc xmm5,xmm5,XMMWORD[16+rcx] + vaesenc xmm5,xmm5,XMMWORD[32+rcx] + vaesenc xmm5,xmm5,XMMWORD[48+rcx] + vaesenc xmm5,xmm5,XMMWORD[64+rcx] + vaesenc xmm5,xmm5,XMMWORD[80+rcx] + vaesenc xmm5,xmm5,XMMWORD[96+rcx] + vaesenc xmm5,xmm5,XMMWORD[112+rcx] + vaesenc xmm5,xmm5,XMMWORD[128+rcx] + vaesenc xmm5,xmm5,XMMWORD[144+rcx] + vaesenclast xmm5,xmm5,XMMWORD[160+rcx] + + + vpxor xmm5,xmm5,XMMWORD[rdi] + vmovdqu XMMWORD[rsi],xmm5 + + add rdi,16 + add rsi,16 + + sub r10,1 + jne NEAR $L$128_enc_msg_x4_loop2 + +$L$128_enc_msg_x4_out: + pop r13 + + pop r12 + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_enc_msg_x4: +global aes128gcmsiv_enc_msg_x8 + +ALIGN 16 +aes128gcmsiv_enc_msg_x8: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_enc_msg_x8: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + test r8,r8 + jnz NEAR $L$128_enc_msg_x8_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$128_enc_msg_x8_start: + push r12 + + push r13 + + push rbp + + mov rbp,rsp + + + + sub rsp,128 + and rsp,-64 + + shr r8,4 + mov r10,r8 + shl r10,61 + shr r10,61 + + + vmovdqu xmm1,XMMWORD[rdx] + vpor xmm1,xmm1,XMMWORD[OR_MASK] + + + vpaddd xmm0,xmm1,XMMWORD[seven] + vmovdqu XMMWORD[rsp],xmm0 + vpaddd xmm9,xmm1,XMMWORD[one] + vpaddd xmm10,xmm1,XMMWORD[two] + vpaddd xmm11,xmm1,XMMWORD[three] + vpaddd xmm12,xmm1,XMMWORD[four] + vpaddd xmm13,xmm1,XMMWORD[five] + vpaddd xmm14,xmm1,XMMWORD[six] + vmovdqa xmm0,xmm1 + + shr r8,3 + je NEAR $L$128_enc_msg_x8_check_remainder + + sub rsi,128 + sub rdi,128 + +$L$128_enc_msg_x8_loop1: + add rsi,128 + add rdi,128 + + vmovdqa xmm1,xmm0 + vmovdqa xmm2,xmm9 + vmovdqa xmm3,xmm10 + vmovdqa xmm4,xmm11 + vmovdqa xmm5,xmm12 + vmovdqa xmm6,xmm13 + vmovdqa xmm7,xmm14 + + vmovdqu xmm8,XMMWORD[rsp] + + vpxor xmm1,xmm1,XMMWORD[rcx] + vpxor xmm2,xmm2,XMMWORD[rcx] + vpxor xmm3,xmm3,XMMWORD[rcx] + vpxor xmm4,xmm4,XMMWORD[rcx] + vpxor xmm5,xmm5,XMMWORD[rcx] + vpxor xmm6,xmm6,XMMWORD[rcx] + vpxor xmm7,xmm7,XMMWORD[rcx] + vpxor xmm8,xmm8,XMMWORD[rcx] + + vmovdqu xmm15,XMMWORD[16+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD[rsp] + vpaddd xmm14,xmm14,XMMWORD[eight] + vmovdqu XMMWORD[rsp],xmm14 + vmovdqu xmm15,XMMWORD[32+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpsubd xmm14,xmm14,XMMWORD[one] + vmovdqu xmm15,XMMWORD[48+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm0,xmm0,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[64+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm9,xmm9,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[80+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm10,xmm10,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[96+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm11,xmm11,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[112+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm12,xmm12,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[128+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm13,xmm13,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[144+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm15,XMMWORD[160+rcx] + vaesenclast xmm1,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm15 + vaesenclast xmm3,xmm3,xmm15 + vaesenclast xmm4,xmm4,xmm15 + vaesenclast xmm5,xmm5,xmm15 + vaesenclast xmm6,xmm6,xmm15 + vaesenclast xmm7,xmm7,xmm15 + vaesenclast xmm8,xmm8,xmm15 + + + + vpxor xmm1,xmm1,XMMWORD[rdi] + vpxor xmm2,xmm2,XMMWORD[16+rdi] + vpxor xmm3,xmm3,XMMWORD[32+rdi] + vpxor xmm4,xmm4,XMMWORD[48+rdi] + vpxor xmm5,xmm5,XMMWORD[64+rdi] + vpxor xmm6,xmm6,XMMWORD[80+rdi] + vpxor xmm7,xmm7,XMMWORD[96+rdi] + vpxor xmm8,xmm8,XMMWORD[112+rdi] + + dec r8 + + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + vmovdqu XMMWORD[80+rsi],xmm6 + vmovdqu XMMWORD[96+rsi],xmm7 + vmovdqu XMMWORD[112+rsi],xmm8 + + jne NEAR $L$128_enc_msg_x8_loop1 + + add rsi,128 + add rdi,128 + +$L$128_enc_msg_x8_check_remainder: + cmp r10,0 + je NEAR $L$128_enc_msg_x8_out + +$L$128_enc_msg_x8_loop2: + + + vmovdqa xmm1,xmm0 + vpaddd xmm0,xmm0,XMMWORD[one] + + vpxor xmm1,xmm1,XMMWORD[rcx] + vaesenc xmm1,xmm1,XMMWORD[16+rcx] + vaesenc xmm1,xmm1,XMMWORD[32+rcx] + vaesenc xmm1,xmm1,XMMWORD[48+rcx] + vaesenc xmm1,xmm1,XMMWORD[64+rcx] + vaesenc xmm1,xmm1,XMMWORD[80+rcx] + vaesenc xmm1,xmm1,XMMWORD[96+rcx] + vaesenc xmm1,xmm1,XMMWORD[112+rcx] + vaesenc xmm1,xmm1,XMMWORD[128+rcx] + vaesenc xmm1,xmm1,XMMWORD[144+rcx] + vaesenclast xmm1,xmm1,XMMWORD[160+rcx] + + + vpxor xmm1,xmm1,XMMWORD[rdi] + + vmovdqu XMMWORD[rsi],xmm1 + + add rdi,16 + add rsi,16 + + dec r10 + jne NEAR $L$128_enc_msg_x8_loop2 + +$L$128_enc_msg_x8_out: + mov rsp,rbp + + pop rbp + + pop r13 + + pop r12 + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_enc_msg_x8: +global aes128gcmsiv_dec + +ALIGN 16 +aes128gcmsiv_dec: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_dec: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + test r9,~15 + jnz NEAR $L$128_dec_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$128_dec_start: + vzeroupper + vmovdqa xmm0,XMMWORD[rdx] + + + vmovdqu xmm15,XMMWORD[16+rdx] + vpor xmm15,xmm15,XMMWORD[OR_MASK] + mov rax,rdx + + lea rax,[32+rax] + lea rcx,[32+rcx] + + and r9,~15 + + + cmp r9,96 + jb NEAR $L$128_dec_loop2 + + + sub r9,96 + vmovdqa xmm7,xmm15 + vpaddd xmm8,xmm7,XMMWORD[one] + vpaddd xmm9,xmm7,XMMWORD[two] + vpaddd xmm10,xmm9,XMMWORD[one] + vpaddd xmm11,xmm9,XMMWORD[two] + vpaddd xmm12,xmm11,XMMWORD[one] + vpaddd xmm15,xmm11,XMMWORD[two] + + vpxor xmm7,xmm7,XMMWORD[r8] + vpxor xmm8,xmm8,XMMWORD[r8] + vpxor xmm9,xmm9,XMMWORD[r8] + vpxor xmm10,xmm10,XMMWORD[r8] + vpxor xmm11,xmm11,XMMWORD[r8] + vpxor xmm12,xmm12,XMMWORD[r8] + + vmovdqu xmm4,XMMWORD[16+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[32+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[48+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[64+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[80+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[96+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[112+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[128+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[144+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[160+r8] + vaesenclast xmm7,xmm7,xmm4 + vaesenclast xmm8,xmm8,xmm4 + vaesenclast xmm9,xmm9,xmm4 + vaesenclast xmm10,xmm10,xmm4 + vaesenclast xmm11,xmm11,xmm4 + vaesenclast xmm12,xmm12,xmm4 + + + vpxor xmm7,xmm7,XMMWORD[rdi] + vpxor xmm8,xmm8,XMMWORD[16+rdi] + vpxor xmm9,xmm9,XMMWORD[32+rdi] + vpxor xmm10,xmm10,XMMWORD[48+rdi] + vpxor xmm11,xmm11,XMMWORD[64+rdi] + vpxor xmm12,xmm12,XMMWORD[80+rdi] + + vmovdqu XMMWORD[rsi],xmm7 + vmovdqu XMMWORD[16+rsi],xmm8 + vmovdqu XMMWORD[32+rsi],xmm9 + vmovdqu XMMWORD[48+rsi],xmm10 + vmovdqu XMMWORD[64+rsi],xmm11 + vmovdqu XMMWORD[80+rsi],xmm12 + + add rdi,96 + add rsi,96 + jmp NEAR $L$128_dec_loop1 + + +ALIGN 64 +$L$128_dec_loop1: + cmp r9,96 + jb NEAR $L$128_dec_finish_96 + sub r9,96 + + vmovdqa xmm6,xmm12 + vmovdqa XMMWORD[(16-32)+rax],xmm11 + vmovdqa XMMWORD[(32-32)+rax],xmm10 + vmovdqa XMMWORD[(48-32)+rax],xmm9 + vmovdqa XMMWORD[(64-32)+rax],xmm8 + vmovdqa XMMWORD[(80-32)+rax],xmm7 + + vmovdqa xmm7,xmm15 + vpaddd xmm8,xmm7,XMMWORD[one] + vpaddd xmm9,xmm7,XMMWORD[two] + vpaddd xmm10,xmm9,XMMWORD[one] + vpaddd xmm11,xmm9,XMMWORD[two] + vpaddd xmm12,xmm11,XMMWORD[one] + vpaddd xmm15,xmm11,XMMWORD[two] + + vmovdqa xmm4,XMMWORD[r8] + vpxor xmm7,xmm7,xmm4 + vpxor xmm8,xmm8,xmm4 + vpxor xmm9,xmm9,xmm4 + vpxor xmm10,xmm10,xmm4 + vpxor xmm11,xmm11,xmm4 + vpxor xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[((0-32))+rcx] + vpclmulqdq xmm2,xmm6,xmm4,0x11 + vpclmulqdq xmm3,xmm6,xmm4,0x00 + vpclmulqdq xmm1,xmm6,xmm4,0x01 + vpclmulqdq xmm4,xmm6,xmm4,0x10 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm4,XMMWORD[16+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[((-16))+rax] + vmovdqu xmm13,XMMWORD[((-16))+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[32+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[rax] + vmovdqu xmm13,XMMWORD[rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[48+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[16+rax] + vmovdqu xmm13,XMMWORD[16+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[64+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[32+rax] + vmovdqu xmm13,XMMWORD[32+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[80+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[96+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[112+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + + vmovdqa xmm6,XMMWORD[((80-32))+rax] + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm5,XMMWORD[((80-32))+rcx] + + vpclmulqdq xmm4,xmm6,xmm5,0x01 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x10 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm4,XMMWORD[128+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + + vpsrldq xmm4,xmm1,8 + vpxor xmm5,xmm2,xmm4 + vpslldq xmm4,xmm1,8 + vpxor xmm0,xmm3,xmm4 + + vmovdqa xmm3,XMMWORD[poly] + + vmovdqu xmm4,XMMWORD[144+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[160+r8] + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vpxor xmm4,xmm6,XMMWORD[rdi] + vaesenclast xmm7,xmm7,xmm4 + vpxor xmm4,xmm6,XMMWORD[16+rdi] + vaesenclast xmm8,xmm8,xmm4 + vpxor xmm4,xmm6,XMMWORD[32+rdi] + vaesenclast xmm9,xmm9,xmm4 + vpxor xmm4,xmm6,XMMWORD[48+rdi] + vaesenclast xmm10,xmm10,xmm4 + vpxor xmm4,xmm6,XMMWORD[64+rdi] + vaesenclast xmm11,xmm11,xmm4 + vpxor xmm4,xmm6,XMMWORD[80+rdi] + vaesenclast xmm12,xmm12,xmm4 + + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vmovdqu XMMWORD[rsi],xmm7 + vmovdqu XMMWORD[16+rsi],xmm8 + vmovdqu XMMWORD[32+rsi],xmm9 + vmovdqu XMMWORD[48+rsi],xmm10 + vmovdqu XMMWORD[64+rsi],xmm11 + vmovdqu XMMWORD[80+rsi],xmm12 + + vpxor xmm0,xmm0,xmm5 + + lea rdi,[96+rdi] + lea rsi,[96+rsi] + jmp NEAR $L$128_dec_loop1 + +$L$128_dec_finish_96: + vmovdqa xmm6,xmm12 + vmovdqa XMMWORD[(16-32)+rax],xmm11 + vmovdqa XMMWORD[(32-32)+rax],xmm10 + vmovdqa XMMWORD[(48-32)+rax],xmm9 + vmovdqa XMMWORD[(64-32)+rax],xmm8 + vmovdqa XMMWORD[(80-32)+rax],xmm7 + + vmovdqu xmm4,XMMWORD[((0-32))+rcx] + vpclmulqdq xmm1,xmm6,xmm4,0x10 + vpclmulqdq xmm2,xmm6,xmm4,0x11 + vpclmulqdq xmm3,xmm6,xmm4,0x00 + vpclmulqdq xmm4,xmm6,xmm4,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[((-16))+rax] + vmovdqu xmm13,XMMWORD[((-16))+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[rax] + vmovdqu xmm13,XMMWORD[rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[16+rax] + vmovdqu xmm13,XMMWORD[16+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[32+rax] + vmovdqu xmm13,XMMWORD[32+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm6,XMMWORD[((80-32))+rax] + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm5,XMMWORD[((80-32))+rcx] + vpclmulqdq xmm4,xmm6,xmm5,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x01 + vpxor xmm1,xmm1,xmm4 + + vpsrldq xmm4,xmm1,8 + vpxor xmm5,xmm2,xmm4 + vpslldq xmm4,xmm1,8 + vpxor xmm0,xmm3,xmm4 + + vmovdqa xmm3,XMMWORD[poly] + + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vpxor xmm0,xmm0,xmm5 + +$L$128_dec_loop2: + + + + cmp r9,16 + jb NEAR $L$128_dec_out + sub r9,16 + + vmovdqa xmm2,xmm15 + vpaddd xmm15,xmm15,XMMWORD[one] + + vpxor xmm2,xmm2,XMMWORD[r8] + vaesenc xmm2,xmm2,XMMWORD[16+r8] + vaesenc xmm2,xmm2,XMMWORD[32+r8] + vaesenc xmm2,xmm2,XMMWORD[48+r8] + vaesenc xmm2,xmm2,XMMWORD[64+r8] + vaesenc xmm2,xmm2,XMMWORD[80+r8] + vaesenc xmm2,xmm2,XMMWORD[96+r8] + vaesenc xmm2,xmm2,XMMWORD[112+r8] + vaesenc xmm2,xmm2,XMMWORD[128+r8] + vaesenc xmm2,xmm2,XMMWORD[144+r8] + vaesenclast xmm2,xmm2,XMMWORD[160+r8] + vpxor xmm2,xmm2,XMMWORD[rdi] + vmovdqu XMMWORD[rsi],xmm2 + add rdi,16 + add rsi,16 + + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm1,XMMWORD[((-32))+rcx] + call GFMUL + + jmp NEAR $L$128_dec_loop2 + +$L$128_dec_out: + vmovdqu XMMWORD[rdx],xmm0 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_dec: +global aes128gcmsiv_ecb_enc_block + +ALIGN 16 +aes128gcmsiv_ecb_enc_block: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes128gcmsiv_ecb_enc_block: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + vmovdqa xmm1,XMMWORD[rdi] + + vpxor xmm1,xmm1,XMMWORD[rdx] + vaesenc xmm1,xmm1,XMMWORD[16+rdx] + vaesenc xmm1,xmm1,XMMWORD[32+rdx] + vaesenc xmm1,xmm1,XMMWORD[48+rdx] + vaesenc xmm1,xmm1,XMMWORD[64+rdx] + vaesenc xmm1,xmm1,XMMWORD[80+rdx] + vaesenc xmm1,xmm1,XMMWORD[96+rdx] + vaesenc xmm1,xmm1,XMMWORD[112+rdx] + vaesenc xmm1,xmm1,XMMWORD[128+rdx] + vaesenc xmm1,xmm1,XMMWORD[144+rdx] + vaesenclast xmm1,xmm1,XMMWORD[160+rdx] + + vmovdqa XMMWORD[rsi],xmm1 + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes128gcmsiv_ecb_enc_block: +global aes256gcmsiv_aes_ks_enc_x1 + +ALIGN 16 +aes256gcmsiv_aes_ks_enc_x1: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + +_CET_ENDBR + vmovdqa xmm0,XMMWORD[con1] + vmovdqa xmm15,XMMWORD[mask] + vmovdqa xmm8,XMMWORD[rdi] + vmovdqa xmm1,XMMWORD[rcx] + vmovdqa xmm3,XMMWORD[16+rcx] + vpxor xmm8,xmm8,xmm1 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[rdx],xmm1 + vmovdqu XMMWORD[16+rdx],xmm3 + vpxor xmm14,xmm14,xmm14 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenc xmm8,xmm8,xmm1 + vmovdqu XMMWORD[32+rdx],xmm1 + + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpslldq xmm4,xmm3,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[48+rdx],xmm3 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenc xmm8,xmm8,xmm1 + vmovdqu XMMWORD[64+rdx],xmm1 + + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpslldq xmm4,xmm3,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[80+rdx],xmm3 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenc xmm8,xmm8,xmm1 + vmovdqu XMMWORD[96+rdx],xmm1 + + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpslldq xmm4,xmm3,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[112+rdx],xmm3 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenc xmm8,xmm8,xmm1 + vmovdqu XMMWORD[128+rdx],xmm1 + + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpslldq xmm4,xmm3,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[144+rdx],xmm3 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenc xmm8,xmm8,xmm1 + vmovdqu XMMWORD[160+rdx],xmm1 + + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpslldq xmm4,xmm3,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[176+rdx],xmm3 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslld xmm0,xmm0,1 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenc xmm8,xmm8,xmm1 + vmovdqu XMMWORD[192+rdx],xmm1 + + vpshufd xmm2,xmm1,0xff + vaesenclast xmm2,xmm2,xmm14 + vpslldq xmm4,xmm3,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm3,xmm3,xmm4 + vpxor xmm3,xmm3,xmm2 + vaesenc xmm8,xmm8,xmm3 + vmovdqu XMMWORD[208+rdx],xmm3 + + vpshufb xmm2,xmm3,xmm15 + vaesenclast xmm2,xmm2,xmm0 + vpslldq xmm4,xmm1,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpslldq xmm4,xmm4,4 + vpxor xmm1,xmm1,xmm4 + vpxor xmm1,xmm1,xmm2 + vaesenclast xmm8,xmm8,xmm1 + vmovdqu XMMWORD[224+rdx],xmm1 + + vmovdqa XMMWORD[rsi],xmm8 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes256gcmsiv_aes_ks_enc_x1: +global aes256gcmsiv_ecb_enc_block + +ALIGN 16 +aes256gcmsiv_ecb_enc_block: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_ecb_enc_block: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + vmovdqa xmm1,XMMWORD[rdi] + vpxor xmm1,xmm1,XMMWORD[rdx] + vaesenc xmm1,xmm1,XMMWORD[16+rdx] + vaesenc xmm1,xmm1,XMMWORD[32+rdx] + vaesenc xmm1,xmm1,XMMWORD[48+rdx] + vaesenc xmm1,xmm1,XMMWORD[64+rdx] + vaesenc xmm1,xmm1,XMMWORD[80+rdx] + vaesenc xmm1,xmm1,XMMWORD[96+rdx] + vaesenc xmm1,xmm1,XMMWORD[112+rdx] + vaesenc xmm1,xmm1,XMMWORD[128+rdx] + vaesenc xmm1,xmm1,XMMWORD[144+rdx] + vaesenc xmm1,xmm1,XMMWORD[160+rdx] + vaesenc xmm1,xmm1,XMMWORD[176+rdx] + vaesenc xmm1,xmm1,XMMWORD[192+rdx] + vaesenc xmm1,xmm1,XMMWORD[208+rdx] + vaesenclast xmm1,xmm1,XMMWORD[224+rdx] + vmovdqa XMMWORD[rsi],xmm1 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes256gcmsiv_ecb_enc_block: +global aes256gcmsiv_enc_msg_x4 + +ALIGN 16 +aes256gcmsiv_enc_msg_x4: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_enc_msg_x4: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + test r8,r8 + jnz NEAR $L$256_enc_msg_x4_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$256_enc_msg_x4_start: + mov r10,r8 + shr r8,4 + shl r10,60 + jz NEAR $L$256_enc_msg_x4_start2 + add r8,1 + +$L$256_enc_msg_x4_start2: + mov r10,r8 + shl r10,62 + shr r10,62 + + + vmovdqa xmm15,XMMWORD[rdx] + vpor xmm15,xmm15,XMMWORD[OR_MASK] + + vmovdqa xmm4,XMMWORD[four] + vmovdqa xmm0,xmm15 + vpaddd xmm1,xmm15,XMMWORD[one] + vpaddd xmm2,xmm15,XMMWORD[two] + vpaddd xmm3,xmm15,XMMWORD[three] + + shr r8,2 + je NEAR $L$256_enc_msg_x4_check_remainder + + sub rsi,64 + sub rdi,64 + +$L$256_enc_msg_x4_loop1: + add rsi,64 + add rdi,64 + + vmovdqa xmm5,xmm0 + vmovdqa xmm6,xmm1 + vmovdqa xmm7,xmm2 + vmovdqa xmm8,xmm3 + + vpxor xmm5,xmm5,XMMWORD[rcx] + vpxor xmm6,xmm6,XMMWORD[rcx] + vpxor xmm7,xmm7,XMMWORD[rcx] + vpxor xmm8,xmm8,XMMWORD[rcx] + + vmovdqu xmm12,XMMWORD[16+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm0,xmm0,xmm4 + vmovdqu xmm12,XMMWORD[32+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm1,xmm1,xmm4 + vmovdqu xmm12,XMMWORD[48+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm2,xmm2,xmm4 + vmovdqu xmm12,XMMWORD[64+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vpaddd xmm3,xmm3,xmm4 + + vmovdqu xmm12,XMMWORD[80+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[96+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[112+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[128+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[144+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[160+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[176+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[192+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[208+rcx] + vaesenc xmm5,xmm5,xmm12 + vaesenc xmm6,xmm6,xmm12 + vaesenc xmm7,xmm7,xmm12 + vaesenc xmm8,xmm8,xmm12 + + vmovdqu xmm12,XMMWORD[224+rcx] + vaesenclast xmm5,xmm5,xmm12 + vaesenclast xmm6,xmm6,xmm12 + vaesenclast xmm7,xmm7,xmm12 + vaesenclast xmm8,xmm8,xmm12 + + + + vpxor xmm5,xmm5,XMMWORD[rdi] + vpxor xmm6,xmm6,XMMWORD[16+rdi] + vpxor xmm7,xmm7,XMMWORD[32+rdi] + vpxor xmm8,xmm8,XMMWORD[48+rdi] + + sub r8,1 + + vmovdqu XMMWORD[rsi],xmm5 + vmovdqu XMMWORD[16+rsi],xmm6 + vmovdqu XMMWORD[32+rsi],xmm7 + vmovdqu XMMWORD[48+rsi],xmm8 + + jne NEAR $L$256_enc_msg_x4_loop1 + + add rsi,64 + add rdi,64 + +$L$256_enc_msg_x4_check_remainder: + cmp r10,0 + je NEAR $L$256_enc_msg_x4_out + +$L$256_enc_msg_x4_loop2: + + + + vmovdqa xmm5,xmm0 + vpaddd xmm0,xmm0,XMMWORD[one] + vpxor xmm5,xmm5,XMMWORD[rcx] + vaesenc xmm5,xmm5,XMMWORD[16+rcx] + vaesenc xmm5,xmm5,XMMWORD[32+rcx] + vaesenc xmm5,xmm5,XMMWORD[48+rcx] + vaesenc xmm5,xmm5,XMMWORD[64+rcx] + vaesenc xmm5,xmm5,XMMWORD[80+rcx] + vaesenc xmm5,xmm5,XMMWORD[96+rcx] + vaesenc xmm5,xmm5,XMMWORD[112+rcx] + vaesenc xmm5,xmm5,XMMWORD[128+rcx] + vaesenc xmm5,xmm5,XMMWORD[144+rcx] + vaesenc xmm5,xmm5,XMMWORD[160+rcx] + vaesenc xmm5,xmm5,XMMWORD[176+rcx] + vaesenc xmm5,xmm5,XMMWORD[192+rcx] + vaesenc xmm5,xmm5,XMMWORD[208+rcx] + vaesenclast xmm5,xmm5,XMMWORD[224+rcx] + + + vpxor xmm5,xmm5,XMMWORD[rdi] + + vmovdqu XMMWORD[rsi],xmm5 + + add rdi,16 + add rsi,16 + + sub r10,1 + jne NEAR $L$256_enc_msg_x4_loop2 + +$L$256_enc_msg_x4_out: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes256gcmsiv_enc_msg_x4: +global aes256gcmsiv_enc_msg_x8 + +ALIGN 16 +aes256gcmsiv_enc_msg_x8: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_enc_msg_x8: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + test r8,r8 + jnz NEAR $L$256_enc_msg_x8_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$256_enc_msg_x8_start: + + mov r11,rsp + sub r11,16 + and r11,-64 + + mov r10,r8 + shr r8,4 + shl r10,60 + jz NEAR $L$256_enc_msg_x8_start2 + add r8,1 + +$L$256_enc_msg_x8_start2: + mov r10,r8 + shl r10,61 + shr r10,61 + + + vmovdqa xmm1,XMMWORD[rdx] + vpor xmm1,xmm1,XMMWORD[OR_MASK] + + + vpaddd xmm0,xmm1,XMMWORD[seven] + vmovdqa XMMWORD[r11],xmm0 + vpaddd xmm9,xmm1,XMMWORD[one] + vpaddd xmm10,xmm1,XMMWORD[two] + vpaddd xmm11,xmm1,XMMWORD[three] + vpaddd xmm12,xmm1,XMMWORD[four] + vpaddd xmm13,xmm1,XMMWORD[five] + vpaddd xmm14,xmm1,XMMWORD[six] + vmovdqa xmm0,xmm1 + + shr r8,3 + jz NEAR $L$256_enc_msg_x8_check_remainder + + sub rsi,128 + sub rdi,128 + +$L$256_enc_msg_x8_loop1: + add rsi,128 + add rdi,128 + + vmovdqa xmm1,xmm0 + vmovdqa xmm2,xmm9 + vmovdqa xmm3,xmm10 + vmovdqa xmm4,xmm11 + vmovdqa xmm5,xmm12 + vmovdqa xmm6,xmm13 + vmovdqa xmm7,xmm14 + + vmovdqa xmm8,XMMWORD[r11] + + vpxor xmm1,xmm1,XMMWORD[rcx] + vpxor xmm2,xmm2,XMMWORD[rcx] + vpxor xmm3,xmm3,XMMWORD[rcx] + vpxor xmm4,xmm4,XMMWORD[rcx] + vpxor xmm5,xmm5,XMMWORD[rcx] + vpxor xmm6,xmm6,XMMWORD[rcx] + vpxor xmm7,xmm7,XMMWORD[rcx] + vpxor xmm8,xmm8,XMMWORD[rcx] + + vmovdqu xmm15,XMMWORD[16+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqa xmm14,XMMWORD[r11] + vpaddd xmm14,xmm14,XMMWORD[eight] + vmovdqa XMMWORD[r11],xmm14 + vmovdqu xmm15,XMMWORD[32+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpsubd xmm14,xmm14,XMMWORD[one] + vmovdqu xmm15,XMMWORD[48+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm0,xmm0,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[64+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm9,xmm9,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[80+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm10,xmm10,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[96+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm11,xmm11,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[112+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm12,xmm12,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[128+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vpaddd xmm13,xmm13,XMMWORD[eight] + vmovdqu xmm15,XMMWORD[144+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm15,XMMWORD[160+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm15,XMMWORD[176+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm15,XMMWORD[192+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm15,XMMWORD[208+rcx] + vaesenc xmm1,xmm1,xmm15 + vaesenc xmm2,xmm2,xmm15 + vaesenc xmm3,xmm3,xmm15 + vaesenc xmm4,xmm4,xmm15 + vaesenc xmm5,xmm5,xmm15 + vaesenc xmm6,xmm6,xmm15 + vaesenc xmm7,xmm7,xmm15 + vaesenc xmm8,xmm8,xmm15 + + vmovdqu xmm15,XMMWORD[224+rcx] + vaesenclast xmm1,xmm1,xmm15 + vaesenclast xmm2,xmm2,xmm15 + vaesenclast xmm3,xmm3,xmm15 + vaesenclast xmm4,xmm4,xmm15 + vaesenclast xmm5,xmm5,xmm15 + vaesenclast xmm6,xmm6,xmm15 + vaesenclast xmm7,xmm7,xmm15 + vaesenclast xmm8,xmm8,xmm15 + + + + vpxor xmm1,xmm1,XMMWORD[rdi] + vpxor xmm2,xmm2,XMMWORD[16+rdi] + vpxor xmm3,xmm3,XMMWORD[32+rdi] + vpxor xmm4,xmm4,XMMWORD[48+rdi] + vpxor xmm5,xmm5,XMMWORD[64+rdi] + vpxor xmm6,xmm6,XMMWORD[80+rdi] + vpxor xmm7,xmm7,XMMWORD[96+rdi] + vpxor xmm8,xmm8,XMMWORD[112+rdi] + + sub r8,1 + + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + vmovdqu XMMWORD[80+rsi],xmm6 + vmovdqu XMMWORD[96+rsi],xmm7 + vmovdqu XMMWORD[112+rsi],xmm8 + + jne NEAR $L$256_enc_msg_x8_loop1 + + add rsi,128 + add rdi,128 + +$L$256_enc_msg_x8_check_remainder: + cmp r10,0 + je NEAR $L$256_enc_msg_x8_out + +$L$256_enc_msg_x8_loop2: + + + vmovdqa xmm1,xmm0 + vpaddd xmm0,xmm0,XMMWORD[one] + + vpxor xmm1,xmm1,XMMWORD[rcx] + vaesenc xmm1,xmm1,XMMWORD[16+rcx] + vaesenc xmm1,xmm1,XMMWORD[32+rcx] + vaesenc xmm1,xmm1,XMMWORD[48+rcx] + vaesenc xmm1,xmm1,XMMWORD[64+rcx] + vaesenc xmm1,xmm1,XMMWORD[80+rcx] + vaesenc xmm1,xmm1,XMMWORD[96+rcx] + vaesenc xmm1,xmm1,XMMWORD[112+rcx] + vaesenc xmm1,xmm1,XMMWORD[128+rcx] + vaesenc xmm1,xmm1,XMMWORD[144+rcx] + vaesenc xmm1,xmm1,XMMWORD[160+rcx] + vaesenc xmm1,xmm1,XMMWORD[176+rcx] + vaesenc xmm1,xmm1,XMMWORD[192+rcx] + vaesenc xmm1,xmm1,XMMWORD[208+rcx] + vaesenclast xmm1,xmm1,XMMWORD[224+rcx] + + + vpxor xmm1,xmm1,XMMWORD[rdi] + + vmovdqu XMMWORD[rsi],xmm1 + + add rdi,16 + add rsi,16 + sub r10,1 + jnz NEAR $L$256_enc_msg_x8_loop2 + +$L$256_enc_msg_x8_out: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + + +$L$SEH_end_aes256gcmsiv_enc_msg_x8: +global aes256gcmsiv_dec + +ALIGN 16 +aes256gcmsiv_dec: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_dec: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + test r9,~15 + jnz NEAR $L$256_dec_start + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$256_dec_start: + vzeroupper + vmovdqa xmm0,XMMWORD[rdx] + + + vmovdqu xmm15,XMMWORD[16+rdx] + vpor xmm15,xmm15,XMMWORD[OR_MASK] + mov rax,rdx + + lea rax,[32+rax] + lea rcx,[32+rcx] + + and r9,~15 + + + cmp r9,96 + jb NEAR $L$256_dec_loop2 + + + sub r9,96 + vmovdqa xmm7,xmm15 + vpaddd xmm8,xmm7,XMMWORD[one] + vpaddd xmm9,xmm7,XMMWORD[two] + vpaddd xmm10,xmm9,XMMWORD[one] + vpaddd xmm11,xmm9,XMMWORD[two] + vpaddd xmm12,xmm11,XMMWORD[one] + vpaddd xmm15,xmm11,XMMWORD[two] + + vpxor xmm7,xmm7,XMMWORD[r8] + vpxor xmm8,xmm8,XMMWORD[r8] + vpxor xmm9,xmm9,XMMWORD[r8] + vpxor xmm10,xmm10,XMMWORD[r8] + vpxor xmm11,xmm11,XMMWORD[r8] + vpxor xmm12,xmm12,XMMWORD[r8] + + vmovdqu xmm4,XMMWORD[16+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[32+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[48+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[64+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[80+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[96+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[112+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[128+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[144+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[160+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[176+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[192+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[208+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[224+r8] + vaesenclast xmm7,xmm7,xmm4 + vaesenclast xmm8,xmm8,xmm4 + vaesenclast xmm9,xmm9,xmm4 + vaesenclast xmm10,xmm10,xmm4 + vaesenclast xmm11,xmm11,xmm4 + vaesenclast xmm12,xmm12,xmm4 + + + vpxor xmm7,xmm7,XMMWORD[rdi] + vpxor xmm8,xmm8,XMMWORD[16+rdi] + vpxor xmm9,xmm9,XMMWORD[32+rdi] + vpxor xmm10,xmm10,XMMWORD[48+rdi] + vpxor xmm11,xmm11,XMMWORD[64+rdi] + vpxor xmm12,xmm12,XMMWORD[80+rdi] + + vmovdqu XMMWORD[rsi],xmm7 + vmovdqu XMMWORD[16+rsi],xmm8 + vmovdqu XMMWORD[32+rsi],xmm9 + vmovdqu XMMWORD[48+rsi],xmm10 + vmovdqu XMMWORD[64+rsi],xmm11 + vmovdqu XMMWORD[80+rsi],xmm12 + + add rdi,96 + add rsi,96 + jmp NEAR $L$256_dec_loop1 + + +ALIGN 64 +$L$256_dec_loop1: + cmp r9,96 + jb NEAR $L$256_dec_finish_96 + sub r9,96 + + vmovdqa xmm6,xmm12 + vmovdqa XMMWORD[(16-32)+rax],xmm11 + vmovdqa XMMWORD[(32-32)+rax],xmm10 + vmovdqa XMMWORD[(48-32)+rax],xmm9 + vmovdqa XMMWORD[(64-32)+rax],xmm8 + vmovdqa XMMWORD[(80-32)+rax],xmm7 + + vmovdqa xmm7,xmm15 + vpaddd xmm8,xmm7,XMMWORD[one] + vpaddd xmm9,xmm7,XMMWORD[two] + vpaddd xmm10,xmm9,XMMWORD[one] + vpaddd xmm11,xmm9,XMMWORD[two] + vpaddd xmm12,xmm11,XMMWORD[one] + vpaddd xmm15,xmm11,XMMWORD[two] + + vmovdqa xmm4,XMMWORD[r8] + vpxor xmm7,xmm7,xmm4 + vpxor xmm8,xmm8,xmm4 + vpxor xmm9,xmm9,xmm4 + vpxor xmm10,xmm10,xmm4 + vpxor xmm11,xmm11,xmm4 + vpxor xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[((0-32))+rcx] + vpclmulqdq xmm2,xmm6,xmm4,0x11 + vpclmulqdq xmm3,xmm6,xmm4,0x00 + vpclmulqdq xmm1,xmm6,xmm4,0x01 + vpclmulqdq xmm4,xmm6,xmm4,0x10 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm4,XMMWORD[16+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[((-16))+rax] + vmovdqu xmm13,XMMWORD[((-16))+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[32+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[rax] + vmovdqu xmm13,XMMWORD[rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[48+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[16+rax] + vmovdqu xmm13,XMMWORD[16+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[64+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[32+rax] + vmovdqu xmm13,XMMWORD[32+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm4,XMMWORD[80+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[96+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[112+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + + vmovdqa xmm6,XMMWORD[((80-32))+rax] + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm5,XMMWORD[((80-32))+rcx] + + vpclmulqdq xmm4,xmm6,xmm5,0x01 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x10 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm4,XMMWORD[128+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + + vpsrldq xmm4,xmm1,8 + vpxor xmm5,xmm2,xmm4 + vpslldq xmm4,xmm1,8 + vpxor xmm0,xmm3,xmm4 + + vmovdqa xmm3,XMMWORD[poly] + + vmovdqu xmm4,XMMWORD[144+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[160+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[176+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[192+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm4,XMMWORD[208+r8] + vaesenc xmm7,xmm7,xmm4 + vaesenc xmm8,xmm8,xmm4 + vaesenc xmm9,xmm9,xmm4 + vaesenc xmm10,xmm10,xmm4 + vaesenc xmm11,xmm11,xmm4 + vaesenc xmm12,xmm12,xmm4 + + vmovdqu xmm6,XMMWORD[224+r8] + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vpxor xmm4,xmm6,XMMWORD[rdi] + vaesenclast xmm7,xmm7,xmm4 + vpxor xmm4,xmm6,XMMWORD[16+rdi] + vaesenclast xmm8,xmm8,xmm4 + vpxor xmm4,xmm6,XMMWORD[32+rdi] + vaesenclast xmm9,xmm9,xmm4 + vpxor xmm4,xmm6,XMMWORD[48+rdi] + vaesenclast xmm10,xmm10,xmm4 + vpxor xmm4,xmm6,XMMWORD[64+rdi] + vaesenclast xmm11,xmm11,xmm4 + vpxor xmm4,xmm6,XMMWORD[80+rdi] + vaesenclast xmm12,xmm12,xmm4 + + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vmovdqu XMMWORD[rsi],xmm7 + vmovdqu XMMWORD[16+rsi],xmm8 + vmovdqu XMMWORD[32+rsi],xmm9 + vmovdqu XMMWORD[48+rsi],xmm10 + vmovdqu XMMWORD[64+rsi],xmm11 + vmovdqu XMMWORD[80+rsi],xmm12 + + vpxor xmm0,xmm0,xmm5 + + lea rdi,[96+rdi] + lea rsi,[96+rsi] + jmp NEAR $L$256_dec_loop1 + +$L$256_dec_finish_96: + vmovdqa xmm6,xmm12 + vmovdqa XMMWORD[(16-32)+rax],xmm11 + vmovdqa XMMWORD[(32-32)+rax],xmm10 + vmovdqa XMMWORD[(48-32)+rax],xmm9 + vmovdqa XMMWORD[(64-32)+rax],xmm8 + vmovdqa XMMWORD[(80-32)+rax],xmm7 + + vmovdqu xmm4,XMMWORD[((0-32))+rcx] + vpclmulqdq xmm1,xmm6,xmm4,0x10 + vpclmulqdq xmm2,xmm6,xmm4,0x11 + vpclmulqdq xmm3,xmm6,xmm4,0x00 + vpclmulqdq xmm4,xmm6,xmm4,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[((-16))+rax] + vmovdqu xmm13,XMMWORD[((-16))+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[rax] + vmovdqu xmm13,XMMWORD[rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[16+rax] + vmovdqu xmm13,XMMWORD[16+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + vmovdqu xmm6,XMMWORD[32+rax] + vmovdqu xmm13,XMMWORD[32+rcx] + + vpclmulqdq xmm4,xmm6,xmm13,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm13,0x01 + vpxor xmm1,xmm1,xmm4 + + + vmovdqu xmm6,XMMWORD[((80-32))+rax] + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm5,XMMWORD[((80-32))+rcx] + vpclmulqdq xmm4,xmm6,xmm5,0x11 + vpxor xmm2,xmm2,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x00 + vpxor xmm3,xmm3,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x10 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm4,xmm6,xmm5,0x01 + vpxor xmm1,xmm1,xmm4 + + vpsrldq xmm4,xmm1,8 + vpxor xmm5,xmm2,xmm4 + vpslldq xmm4,xmm1,8 + vpxor xmm0,xmm3,xmm4 + + vmovdqa xmm3,XMMWORD[poly] + + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vpalignr xmm2,xmm0,xmm0,8 + vpclmulqdq xmm0,xmm0,xmm3,0x10 + vpxor xmm0,xmm2,xmm0 + + vpxor xmm0,xmm0,xmm5 + +$L$256_dec_loop2: + + + + cmp r9,16 + jb NEAR $L$256_dec_out + sub r9,16 + + vmovdqa xmm2,xmm15 + vpaddd xmm15,xmm15,XMMWORD[one] + + vpxor xmm2,xmm2,XMMWORD[r8] + vaesenc xmm2,xmm2,XMMWORD[16+r8] + vaesenc xmm2,xmm2,XMMWORD[32+r8] + vaesenc xmm2,xmm2,XMMWORD[48+r8] + vaesenc xmm2,xmm2,XMMWORD[64+r8] + vaesenc xmm2,xmm2,XMMWORD[80+r8] + vaesenc xmm2,xmm2,XMMWORD[96+r8] + vaesenc xmm2,xmm2,XMMWORD[112+r8] + vaesenc xmm2,xmm2,XMMWORD[128+r8] + vaesenc xmm2,xmm2,XMMWORD[144+r8] + vaesenc xmm2,xmm2,XMMWORD[160+r8] + vaesenc xmm2,xmm2,XMMWORD[176+r8] + vaesenc xmm2,xmm2,XMMWORD[192+r8] + vaesenc xmm2,xmm2,XMMWORD[208+r8] + vaesenclast xmm2,xmm2,XMMWORD[224+r8] + vpxor xmm2,xmm2,XMMWORD[rdi] + vmovdqu XMMWORD[rsi],xmm2 + add rdi,16 + add rsi,16 + + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm1,XMMWORD[((-32))+rcx] + call GFMUL + + jmp NEAR $L$256_dec_loop2 + +$L$256_dec_out: + vmovdqu XMMWORD[rdx],xmm0 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes256gcmsiv_dec: +global aes256gcmsiv_kdf + +ALIGN 16 +aes256gcmsiv_kdf: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aes256gcmsiv_kdf: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_CET_ENDBR + + + + + vmovdqa xmm1,XMMWORD[rdx] + vmovdqa xmm4,XMMWORD[rdi] + vmovdqa xmm11,XMMWORD[and_mask] + vmovdqa xmm8,XMMWORD[one] + vpshufd xmm4,xmm4,0x90 + vpand xmm4,xmm4,xmm11 + vpaddd xmm6,xmm4,xmm8 + vpaddd xmm7,xmm6,xmm8 + vpaddd xmm11,xmm7,xmm8 + vpaddd xmm12,xmm11,xmm8 + vpaddd xmm13,xmm12,xmm8 + + vpxor xmm4,xmm4,xmm1 + vpxor xmm6,xmm6,xmm1 + vpxor xmm7,xmm7,xmm1 + vpxor xmm11,xmm11,xmm1 + vpxor xmm12,xmm12,xmm1 + vpxor xmm13,xmm13,xmm1 + + vmovdqa xmm1,XMMWORD[16+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[32+rdx] + vaesenc xmm4,xmm4,xmm2 + vaesenc xmm6,xmm6,xmm2 + vaesenc xmm7,xmm7,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + vaesenc xmm13,xmm13,xmm2 + + vmovdqa xmm1,XMMWORD[48+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[64+rdx] + vaesenc xmm4,xmm4,xmm2 + vaesenc xmm6,xmm6,xmm2 + vaesenc xmm7,xmm7,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + vaesenc xmm13,xmm13,xmm2 + + vmovdqa xmm1,XMMWORD[80+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[96+rdx] + vaesenc xmm4,xmm4,xmm2 + vaesenc xmm6,xmm6,xmm2 + vaesenc xmm7,xmm7,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + vaesenc xmm13,xmm13,xmm2 + + vmovdqa xmm1,XMMWORD[112+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[128+rdx] + vaesenc xmm4,xmm4,xmm2 + vaesenc xmm6,xmm6,xmm2 + vaesenc xmm7,xmm7,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + vaesenc xmm13,xmm13,xmm2 + + vmovdqa xmm1,XMMWORD[144+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[160+rdx] + vaesenc xmm4,xmm4,xmm2 + vaesenc xmm6,xmm6,xmm2 + vaesenc xmm7,xmm7,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + vaesenc xmm13,xmm13,xmm2 + + vmovdqa xmm1,XMMWORD[176+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[192+rdx] + vaesenc xmm4,xmm4,xmm2 + vaesenc xmm6,xmm6,xmm2 + vaesenc xmm7,xmm7,xmm2 + vaesenc xmm11,xmm11,xmm2 + vaesenc xmm12,xmm12,xmm2 + vaesenc xmm13,xmm13,xmm2 + + vmovdqa xmm1,XMMWORD[208+rdx] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + + vmovdqa xmm2,XMMWORD[224+rdx] + vaesenclast xmm4,xmm4,xmm2 + vaesenclast xmm6,xmm6,xmm2 + vaesenclast xmm7,xmm7,xmm2 + vaesenclast xmm11,xmm11,xmm2 + vaesenclast xmm12,xmm12,xmm2 + vaesenclast xmm13,xmm13,xmm2 + + + vmovdqa XMMWORD[rsi],xmm4 + vmovdqa XMMWORD[16+rsi],xmm6 + vmovdqa XMMWORD[32+rsi],xmm7 + vmovdqa XMMWORD[48+rsi],xmm11 + vmovdqa XMMWORD[64+rsi],xmm12 + vmovdqa XMMWORD[80+rsi],xmm13 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_aes256gcmsiv_kdf: +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-armv4-linux.S b/yass/third_party/boringssl/src/gen/crypto/chacha-armv4-linux.S new file mode 100644 index 0000000000..2255dd2b4b --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-armv4-linux.S @@ -0,0 +1,1451 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +#include + +@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both +@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. +.arch armv7-a + +.text +#if defined(__thumb2__) || defined(__clang__) +.syntax unified +#endif +#if defined(__thumb2__) +.thumb +#else +.code 32 +#endif + +#if defined(__thumb2__) || defined(__clang__) +#define ldrhsb ldrbhs +#endif + +.align 5 +.Lsigma: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral +.Lone: +.long 1,0,0,0 + +.globl ChaCha20_ctr32_nohw +.hidden ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,%function +.align 5 +ChaCha20_ctr32_nohw: + ldr r12,[sp,#0] @ pull pointer to counter and nonce + stmdb sp!,{r0,r1,r2,r4-r11,lr} + adr r14,.Lsigma + ldmia r12,{r4,r5,r6,r7} @ load counter and nonce + sub sp,sp,#4*(16) @ off-load area + stmdb sp!,{r4,r5,r6,r7} @ copy counter and nonce + ldmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} @ load key + ldmia r14,{r0,r1,r2,r3} @ load sigma + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy key + stmdb sp!,{r0,r1,r2,r3} @ copy sigma + str r10,[sp,#4*(16+10)] @ off-load "rx" + str r11,[sp,#4*(16+11)] @ off-load "rx" + b .Loop_outer_enter + +.align 4 +.Loop_outer: + ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material + str r11,[sp,#4*(32+2)] @ save len + str r12, [sp,#4*(32+1)] @ save inp + str r14, [sp,#4*(32+0)] @ save out +.Loop_outer_enter: + ldr r11, [sp,#4*(15)] + ldr r12,[sp,#4*(12)] @ modulo-scheduled load + ldr r10, [sp,#4*(13)] + ldr r14,[sp,#4*(14)] + str r11, [sp,#4*(16+15)] + mov r11,#10 + b .Loop + +.align 4 +.Loop: + subs r11,r11,#1 + add r0,r0,r4 + mov r12,r12,ror#16 + add r1,r1,r5 + mov r10,r10,ror#16 + eor r12,r12,r0,ror#16 + eor r10,r10,r1,ror#16 + add r8,r8,r12 + mov r4,r4,ror#20 + add r9,r9,r10 + mov r5,r5,ror#20 + eor r4,r4,r8,ror#20 + eor r5,r5,r9,ror#20 + add r0,r0,r4 + mov r12,r12,ror#24 + add r1,r1,r5 + mov r10,r10,ror#24 + eor r12,r12,r0,ror#24 + eor r10,r10,r1,ror#24 + add r8,r8,r12 + mov r4,r4,ror#25 + add r9,r9,r10 + mov r5,r5,ror#25 + str r10,[sp,#4*(16+13)] + ldr r10,[sp,#4*(16+15)] + eor r4,r4,r8,ror#25 + eor r5,r5,r9,ror#25 + str r8,[sp,#4*(16+8)] + ldr r8,[sp,#4*(16+10)] + add r2,r2,r6 + mov r14,r14,ror#16 + str r9,[sp,#4*(16+9)] + ldr r9,[sp,#4*(16+11)] + add r3,r3,r7 + mov r10,r10,ror#16 + eor r14,r14,r2,ror#16 + eor r10,r10,r3,ror#16 + add r8,r8,r14 + mov r6,r6,ror#20 + add r9,r9,r10 + mov r7,r7,ror#20 + eor r6,r6,r8,ror#20 + eor r7,r7,r9,ror#20 + add r2,r2,r6 + mov r14,r14,ror#24 + add r3,r3,r7 + mov r10,r10,ror#24 + eor r14,r14,r2,ror#24 + eor r10,r10,r3,ror#24 + add r8,r8,r14 + mov r6,r6,ror#25 + add r9,r9,r10 + mov r7,r7,ror#25 + eor r6,r6,r8,ror#25 + eor r7,r7,r9,ror#25 + add r0,r0,r5 + mov r10,r10,ror#16 + add r1,r1,r6 + mov r12,r12,ror#16 + eor r10,r10,r0,ror#16 + eor r12,r12,r1,ror#16 + add r8,r8,r10 + mov r5,r5,ror#20 + add r9,r9,r12 + mov r6,r6,ror#20 + eor r5,r5,r8,ror#20 + eor r6,r6,r9,ror#20 + add r0,r0,r5 + mov r10,r10,ror#24 + add r1,r1,r6 + mov r12,r12,ror#24 + eor r10,r10,r0,ror#24 + eor r12,r12,r1,ror#24 + add r8,r8,r10 + mov r5,r5,ror#25 + str r10,[sp,#4*(16+15)] + ldr r10,[sp,#4*(16+13)] + add r9,r9,r12 + mov r6,r6,ror#25 + eor r5,r5,r8,ror#25 + eor r6,r6,r9,ror#25 + str r8,[sp,#4*(16+10)] + ldr r8,[sp,#4*(16+8)] + add r2,r2,r7 + mov r10,r10,ror#16 + str r9,[sp,#4*(16+11)] + ldr r9,[sp,#4*(16+9)] + add r3,r3,r4 + mov r14,r14,ror#16 + eor r10,r10,r2,ror#16 + eor r14,r14,r3,ror#16 + add r8,r8,r10 + mov r7,r7,ror#20 + add r9,r9,r14 + mov r4,r4,ror#20 + eor r7,r7,r8,ror#20 + eor r4,r4,r9,ror#20 + add r2,r2,r7 + mov r10,r10,ror#24 + add r3,r3,r4 + mov r14,r14,ror#24 + eor r10,r10,r2,ror#24 + eor r14,r14,r3,ror#24 + add r8,r8,r10 + mov r7,r7,ror#25 + add r9,r9,r14 + mov r4,r4,ror#25 + eor r7,r7,r8,ror#25 + eor r4,r4,r9,ror#25 + bne .Loop + + ldr r11,[sp,#4*(32+2)] @ load len + + str r8, [sp,#4*(16+8)] @ modulo-scheduled store + str r9, [sp,#4*(16+9)] + str r12,[sp,#4*(16+12)] + str r10, [sp,#4*(16+13)] + str r14,[sp,#4*(16+14)] + + @ at this point we have first half of 512-bit result in + @ rx and second half at sp+4*(16+8) + + cmp r11,#64 @ done yet? +#ifdef __thumb2__ + itete lo +#endif + addlo r12,sp,#4*(0) @ shortcut or ... + ldrhs r12,[sp,#4*(32+1)] @ ... load inp + addlo r14,sp,#4*(0) @ shortcut or ... + ldrhs r14,[sp,#4*(32+0)] @ ... load out + + ldr r8,[sp,#4*(0)] @ load key material + ldr r9,[sp,#4*(1)] + +#if __ARM_ARCH>=6 || !defined(__ARMEB__) +# if __ARM_ARCH<7 + orr r10,r12,r14 + tst r10,#3 @ are input and output aligned? + ldr r10,[sp,#4*(2)] + bne .Lunaligned + cmp r11,#64 @ restore flags +# else + ldr r10,[sp,#4*(2)] +# endif + ldr r11,[sp,#4*(3)] + + add r0,r0,r8 @ accumulate key material + add r1,r1,r9 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r8,[r12],#16 @ load input + ldrhs r9,[r12,#-12] + + add r2,r2,r10 + add r3,r3,r11 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r10,[r12,#-8] + ldrhs r11,[r12,#-4] +# if __ARM_ARCH>=6 && defined(__ARMEB__) + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs r0,r0,r8 @ xor with input + eorhs r1,r1,r9 + add r8,sp,#4*(4) + str r0,[r14],#16 @ store output +# ifdef __thumb2__ + itt hs +# endif + eorhs r2,r2,r10 + eorhs r3,r3,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material + str r1,[r14,#-12] + str r2,[r14,#-8] + str r3,[r14,#-4] + + add r4,r4,r8 @ accumulate key material + add r5,r5,r9 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r8,[r12],#16 @ load input + ldrhs r9,[r12,#-12] + add r6,r6,r10 + add r7,r7,r11 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r10,[r12,#-8] + ldrhs r11,[r12,#-4] +# if __ARM_ARCH>=6 && defined(__ARMEB__) + rev r4,r4 + rev r5,r5 + rev r6,r6 + rev r7,r7 +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs r4,r4,r8 + eorhs r5,r5,r9 + add r8,sp,#4*(8) + str r4,[r14],#16 @ store output +# ifdef __thumb2__ + itt hs +# endif + eorhs r6,r6,r10 + eorhs r7,r7,r11 + str r5,[r14,#-12] + ldmia r8,{r8,r9,r10,r11} @ load key material + str r6,[r14,#-8] + add r0,sp,#4*(16+8) + str r7,[r14,#-4] + + ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half + + add r0,r0,r8 @ accumulate key material + add r1,r1,r9 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r8,[r12],#16 @ load input + ldrhs r9,[r12,#-12] +# ifdef __thumb2__ + itt hi +# endif + strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it + strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it + add r2,r2,r10 + add r3,r3,r11 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r10,[r12,#-8] + ldrhs r11,[r12,#-4] +# if __ARM_ARCH>=6 && defined(__ARMEB__) + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs r0,r0,r8 + eorhs r1,r1,r9 + add r8,sp,#4*(12) + str r0,[r14],#16 @ store output +# ifdef __thumb2__ + itt hs +# endif + eorhs r2,r2,r10 + eorhs r3,r3,r11 + str r1,[r14,#-12] + ldmia r8,{r8,r9,r10,r11} @ load key material + str r2,[r14,#-8] + str r3,[r14,#-4] + + add r4,r4,r8 @ accumulate key material + add r5,r5,r9 +# ifdef __thumb2__ + itt hi +# endif + addhi r8,r8,#1 @ next counter value + strhi r8,[sp,#4*(12)] @ save next counter value +# ifdef __thumb2__ + itt hs +# endif + ldrhs r8,[r12],#16 @ load input + ldrhs r9,[r12,#-12] + add r6,r6,r10 + add r7,r7,r11 +# ifdef __thumb2__ + itt hs +# endif + ldrhs r10,[r12,#-8] + ldrhs r11,[r12,#-4] +# if __ARM_ARCH>=6 && defined(__ARMEB__) + rev r4,r4 + rev r5,r5 + rev r6,r6 + rev r7,r7 +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs r4,r4,r8 + eorhs r5,r5,r9 +# ifdef __thumb2__ + it ne +# endif + ldrne r8,[sp,#4*(32+2)] @ re-load len +# ifdef __thumb2__ + itt hs +# endif + eorhs r6,r6,r10 + eorhs r7,r7,r11 + str r4,[r14],#16 @ store output + str r5,[r14,#-12] +# ifdef __thumb2__ + it hs +# endif + subhs r11,r8,#64 @ len-=64 + str r6,[r14,#-8] + str r7,[r14,#-4] + bhi .Loop_outer + + beq .Ldone +# if __ARM_ARCH<7 + b .Ltail + +.align 4 +.Lunaligned:@ unaligned endian-neutral path + cmp r11,#64 @ restore flags +# endif +#endif +#if __ARM_ARCH<7 + ldr r11,[sp,#4*(3)] + add r0,r0,r8 @ accumulate key material + add r1,r1,r9 + add r2,r2,r10 +# ifdef __thumb2__ + itete lo +# endif + eorlo r8,r8,r8 @ zero or ... + ldrhsb r8,[r12],#16 @ ... load input + eorlo r9,r9,r9 + ldrhsb r9,[r12,#-12] + + add r3,r3,r11 +# ifdef __thumb2__ + itete lo +# endif + eorlo r10,r10,r10 + ldrhsb r10,[r12,#-8] + eorlo r11,r11,r11 + ldrhsb r11,[r12,#-4] + + eor r0,r8,r0 @ xor with input (or zero) + eor r1,r9,r1 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-15] @ load more input + ldrhsb r9,[r12,#-11] + eor r2,r10,r2 + strb r0,[r14],#16 @ store output + eor r3,r11,r3 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-7] + ldrhsb r11,[r12,#-3] + strb r1,[r14,#-12] + eor r0,r8,r0,lsr#8 + strb r2,[r14,#-8] + eor r1,r9,r1,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-14] @ load more input + ldrhsb r9,[r12,#-10] + strb r3,[r14,#-4] + eor r2,r10,r2,lsr#8 + strb r0,[r14,#-15] + eor r3,r11,r3,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-6] + ldrhsb r11,[r12,#-2] + strb r1,[r14,#-11] + eor r0,r8,r0,lsr#8 + strb r2,[r14,#-7] + eor r1,r9,r1,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-13] @ load more input + ldrhsb r9,[r12,#-9] + strb r3,[r14,#-3] + eor r2,r10,r2,lsr#8 + strb r0,[r14,#-14] + eor r3,r11,r3,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-5] + ldrhsb r11,[r12,#-1] + strb r1,[r14,#-10] + strb r2,[r14,#-6] + eor r0,r8,r0,lsr#8 + strb r3,[r14,#-2] + eor r1,r9,r1,lsr#8 + strb r0,[r14,#-13] + eor r2,r10,r2,lsr#8 + strb r1,[r14,#-9] + eor r3,r11,r3,lsr#8 + strb r2,[r14,#-5] + strb r3,[r14,#-1] + add r8,sp,#4*(4+0) + ldmia r8,{r8,r9,r10,r11} @ load key material + add r0,sp,#4*(16+8) + add r4,r4,r8 @ accumulate key material + add r5,r5,r9 + add r6,r6,r10 +# ifdef __thumb2__ + itete lo +# endif + eorlo r8,r8,r8 @ zero or ... + ldrhsb r8,[r12],#16 @ ... load input + eorlo r9,r9,r9 + ldrhsb r9,[r12,#-12] + + add r7,r7,r11 +# ifdef __thumb2__ + itete lo +# endif + eorlo r10,r10,r10 + ldrhsb r10,[r12,#-8] + eorlo r11,r11,r11 + ldrhsb r11,[r12,#-4] + + eor r4,r8,r4 @ xor with input (or zero) + eor r5,r9,r5 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-15] @ load more input + ldrhsb r9,[r12,#-11] + eor r6,r10,r6 + strb r4,[r14],#16 @ store output + eor r7,r11,r7 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-7] + ldrhsb r11,[r12,#-3] + strb r5,[r14,#-12] + eor r4,r8,r4,lsr#8 + strb r6,[r14,#-8] + eor r5,r9,r5,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-14] @ load more input + ldrhsb r9,[r12,#-10] + strb r7,[r14,#-4] + eor r6,r10,r6,lsr#8 + strb r4,[r14,#-15] + eor r7,r11,r7,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-6] + ldrhsb r11,[r12,#-2] + strb r5,[r14,#-11] + eor r4,r8,r4,lsr#8 + strb r6,[r14,#-7] + eor r5,r9,r5,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-13] @ load more input + ldrhsb r9,[r12,#-9] + strb r7,[r14,#-3] + eor r6,r10,r6,lsr#8 + strb r4,[r14,#-14] + eor r7,r11,r7,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-5] + ldrhsb r11,[r12,#-1] + strb r5,[r14,#-10] + strb r6,[r14,#-6] + eor r4,r8,r4,lsr#8 + strb r7,[r14,#-2] + eor r5,r9,r5,lsr#8 + strb r4,[r14,#-13] + eor r6,r10,r6,lsr#8 + strb r5,[r14,#-9] + eor r7,r11,r7,lsr#8 + strb r6,[r14,#-5] + strb r7,[r14,#-1] + add r8,sp,#4*(4+4) + ldmia r8,{r8,r9,r10,r11} @ load key material + ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half +# ifdef __thumb2__ + itt hi +# endif + strhi r10,[sp,#4*(16+10)] @ copy "rx" + strhi r11,[sp,#4*(16+11)] @ copy "rx" + add r0,r0,r8 @ accumulate key material + add r1,r1,r9 + add r2,r2,r10 +# ifdef __thumb2__ + itete lo +# endif + eorlo r8,r8,r8 @ zero or ... + ldrhsb r8,[r12],#16 @ ... load input + eorlo r9,r9,r9 + ldrhsb r9,[r12,#-12] + + add r3,r3,r11 +# ifdef __thumb2__ + itete lo +# endif + eorlo r10,r10,r10 + ldrhsb r10,[r12,#-8] + eorlo r11,r11,r11 + ldrhsb r11,[r12,#-4] + + eor r0,r8,r0 @ xor with input (or zero) + eor r1,r9,r1 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-15] @ load more input + ldrhsb r9,[r12,#-11] + eor r2,r10,r2 + strb r0,[r14],#16 @ store output + eor r3,r11,r3 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-7] + ldrhsb r11,[r12,#-3] + strb r1,[r14,#-12] + eor r0,r8,r0,lsr#8 + strb r2,[r14,#-8] + eor r1,r9,r1,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-14] @ load more input + ldrhsb r9,[r12,#-10] + strb r3,[r14,#-4] + eor r2,r10,r2,lsr#8 + strb r0,[r14,#-15] + eor r3,r11,r3,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-6] + ldrhsb r11,[r12,#-2] + strb r1,[r14,#-11] + eor r0,r8,r0,lsr#8 + strb r2,[r14,#-7] + eor r1,r9,r1,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-13] @ load more input + ldrhsb r9,[r12,#-9] + strb r3,[r14,#-3] + eor r2,r10,r2,lsr#8 + strb r0,[r14,#-14] + eor r3,r11,r3,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-5] + ldrhsb r11,[r12,#-1] + strb r1,[r14,#-10] + strb r2,[r14,#-6] + eor r0,r8,r0,lsr#8 + strb r3,[r14,#-2] + eor r1,r9,r1,lsr#8 + strb r0,[r14,#-13] + eor r2,r10,r2,lsr#8 + strb r1,[r14,#-9] + eor r3,r11,r3,lsr#8 + strb r2,[r14,#-5] + strb r3,[r14,#-1] + add r8,sp,#4*(4+8) + ldmia r8,{r8,r9,r10,r11} @ load key material + add r4,r4,r8 @ accumulate key material +# ifdef __thumb2__ + itt hi +# endif + addhi r8,r8,#1 @ next counter value + strhi r8,[sp,#4*(12)] @ save next counter value + add r5,r5,r9 + add r6,r6,r10 +# ifdef __thumb2__ + itete lo +# endif + eorlo r8,r8,r8 @ zero or ... + ldrhsb r8,[r12],#16 @ ... load input + eorlo r9,r9,r9 + ldrhsb r9,[r12,#-12] + + add r7,r7,r11 +# ifdef __thumb2__ + itete lo +# endif + eorlo r10,r10,r10 + ldrhsb r10,[r12,#-8] + eorlo r11,r11,r11 + ldrhsb r11,[r12,#-4] + + eor r4,r8,r4 @ xor with input (or zero) + eor r5,r9,r5 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-15] @ load more input + ldrhsb r9,[r12,#-11] + eor r6,r10,r6 + strb r4,[r14],#16 @ store output + eor r7,r11,r7 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-7] + ldrhsb r11,[r12,#-3] + strb r5,[r14,#-12] + eor r4,r8,r4,lsr#8 + strb r6,[r14,#-8] + eor r5,r9,r5,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-14] @ load more input + ldrhsb r9,[r12,#-10] + strb r7,[r14,#-4] + eor r6,r10,r6,lsr#8 + strb r4,[r14,#-15] + eor r7,r11,r7,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-6] + ldrhsb r11,[r12,#-2] + strb r5,[r14,#-11] + eor r4,r8,r4,lsr#8 + strb r6,[r14,#-7] + eor r5,r9,r5,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r8,[r12,#-13] @ load more input + ldrhsb r9,[r12,#-9] + strb r7,[r14,#-3] + eor r6,r10,r6,lsr#8 + strb r4,[r14,#-14] + eor r7,r11,r7,lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb r10,[r12,#-5] + ldrhsb r11,[r12,#-1] + strb r5,[r14,#-10] + strb r6,[r14,#-6] + eor r4,r8,r4,lsr#8 + strb r7,[r14,#-2] + eor r5,r9,r5,lsr#8 + strb r4,[r14,#-13] + eor r6,r10,r6,lsr#8 + strb r5,[r14,#-9] + eor r7,r11,r7,lsr#8 + strb r6,[r14,#-5] + strb r7,[r14,#-1] +# ifdef __thumb2__ + it ne +# endif + ldrne r8,[sp,#4*(32+2)] @ re-load len +# ifdef __thumb2__ + it hs +# endif + subhs r11,r8,#64 @ len-=64 + bhi .Loop_outer + + beq .Ldone +#endif + +.Ltail: + ldr r12,[sp,#4*(32+1)] @ load inp + add r9,sp,#4*(0) + ldr r14,[sp,#4*(32+0)] @ load out + +.Loop_tail: + ldrb r10,[r9],#1 @ read buffer on stack + ldrb r11,[r12],#1 @ read input + subs r8,r8,#1 + eor r11,r11,r10 + strb r11,[r14],#1 @ store output + bne .Loop_tail + +.Ldone: + add sp,sp,#4*(32+3) + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl ChaCha20_ctr32_neon +.hidden ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function +.align 5 +ChaCha20_ctr32_neon: + ldr r12,[sp,#0] @ pull pointer to counter and nonce + stmdb sp!,{r0,r1,r2,r4-r11,lr} + adr r14,.Lsigma + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI spec says so + stmdb sp!,{r0,r1,r2,r3} + + vld1.32 {q1,q2},[r3] @ load key + ldmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} @ load key + + sub sp,sp,#4*(16+16) + vld1.32 {q3},[r12] @ load counter and nonce + add r12,sp,#4*8 + ldmia r14,{r0,r1,r2,r3} @ load sigma + vld1.32 {q0},[r14]! @ load sigma + vld1.32 {q12},[r14] @ one + vst1.32 {q2,q3},[r12] @ copy 1/2key|counter|nonce + vst1.32 {q0,q1},[sp] @ copy sigma|1/2key + + str r10,[sp,#4*(16+10)] @ off-load "rx" + str r11,[sp,#4*(16+11)] @ off-load "rx" + vshl.i32 d26,d24,#1 @ two + vstr d24,[sp,#4*(16+0)] + vshl.i32 d28,d24,#2 @ four + vstr d26,[sp,#4*(16+2)] + vmov q4,q0 + vstr d28,[sp,#4*(16+4)] + vmov q8,q0 + vmov q5,q1 + vmov q9,q1 + b .Loop_neon_enter + +.align 4 +.Loop_neon_outer: + ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material + cmp r11,#64*2 @ if len<=64*2 + bls .Lbreak_neon @ switch to integer-only + vmov q4,q0 + str r11,[sp,#4*(32+2)] @ save len + vmov q8,q0 + str r12, [sp,#4*(32+1)] @ save inp + vmov q5,q1 + str r14, [sp,#4*(32+0)] @ save out + vmov q9,q1 +.Loop_neon_enter: + ldr r11, [sp,#4*(15)] + vadd.i32 q7,q3,q12 @ counter+1 + ldr r12,[sp,#4*(12)] @ modulo-scheduled load + vmov q6,q2 + ldr r10, [sp,#4*(13)] + vmov q10,q2 + ldr r14,[sp,#4*(14)] + vadd.i32 q11,q7,q12 @ counter+2 + str r11, [sp,#4*(16+15)] + mov r11,#10 + add r12,r12,#3 @ counter+3 + b .Loop_neon + +.align 4 +.Loop_neon: + subs r11,r11,#1 + vadd.i32 q0,q0,q1 + add r0,r0,r4 + vadd.i32 q4,q4,q5 + mov r12,r12,ror#16 + vadd.i32 q8,q8,q9 + add r1,r1,r5 + veor q3,q3,q0 + mov r10,r10,ror#16 + veor q7,q7,q4 + eor r12,r12,r0,ror#16 + veor q11,q11,q8 + eor r10,r10,r1,ror#16 + vrev32.16 q3,q3 + add r8,r8,r12 + vrev32.16 q7,q7 + mov r4,r4,ror#20 + vrev32.16 q11,q11 + add r9,r9,r10 + vadd.i32 q2,q2,q3 + mov r5,r5,ror#20 + vadd.i32 q6,q6,q7 + eor r4,r4,r8,ror#20 + vadd.i32 q10,q10,q11 + eor r5,r5,r9,ror#20 + veor q12,q1,q2 + add r0,r0,r4 + veor q13,q5,q6 + mov r12,r12,ror#24 + veor q14,q9,q10 + add r1,r1,r5 + vshr.u32 q1,q12,#20 + mov r10,r10,ror#24 + vshr.u32 q5,q13,#20 + eor r12,r12,r0,ror#24 + vshr.u32 q9,q14,#20 + eor r10,r10,r1,ror#24 + vsli.32 q1,q12,#12 + add r8,r8,r12 + vsli.32 q5,q13,#12 + mov r4,r4,ror#25 + vsli.32 q9,q14,#12 + add r9,r9,r10 + vadd.i32 q0,q0,q1 + mov r5,r5,ror#25 + vadd.i32 q4,q4,q5 + str r10,[sp,#4*(16+13)] + vadd.i32 q8,q8,q9 + ldr r10,[sp,#4*(16+15)] + veor q12,q3,q0 + eor r4,r4,r8,ror#25 + veor q13,q7,q4 + eor r5,r5,r9,ror#25 + veor q14,q11,q8 + str r8,[sp,#4*(16+8)] + vshr.u32 q3,q12,#24 + ldr r8,[sp,#4*(16+10)] + vshr.u32 q7,q13,#24 + add r2,r2,r6 + vshr.u32 q11,q14,#24 + mov r14,r14,ror#16 + vsli.32 q3,q12,#8 + str r9,[sp,#4*(16+9)] + vsli.32 q7,q13,#8 + ldr r9,[sp,#4*(16+11)] + vsli.32 q11,q14,#8 + add r3,r3,r7 + vadd.i32 q2,q2,q3 + mov r10,r10,ror#16 + vadd.i32 q6,q6,q7 + eor r14,r14,r2,ror#16 + vadd.i32 q10,q10,q11 + eor r10,r10,r3,ror#16 + veor q12,q1,q2 + add r8,r8,r14 + veor q13,q5,q6 + mov r6,r6,ror#20 + veor q14,q9,q10 + add r9,r9,r10 + vshr.u32 q1,q12,#25 + mov r7,r7,ror#20 + vshr.u32 q5,q13,#25 + eor r6,r6,r8,ror#20 + vshr.u32 q9,q14,#25 + eor r7,r7,r9,ror#20 + vsli.32 q1,q12,#7 + add r2,r2,r6 + vsli.32 q5,q13,#7 + mov r14,r14,ror#24 + vsli.32 q9,q14,#7 + add r3,r3,r7 + vext.8 q2,q2,q2,#8 + mov r10,r10,ror#24 + vext.8 q6,q6,q6,#8 + eor r14,r14,r2,ror#24 + vext.8 q10,q10,q10,#8 + eor r10,r10,r3,ror#24 + vext.8 q1,q1,q1,#4 + add r8,r8,r14 + vext.8 q5,q5,q5,#4 + mov r6,r6,ror#25 + vext.8 q9,q9,q9,#4 + add r9,r9,r10 + vext.8 q3,q3,q3,#12 + mov r7,r7,ror#25 + vext.8 q7,q7,q7,#12 + eor r6,r6,r8,ror#25 + vext.8 q11,q11,q11,#12 + eor r7,r7,r9,ror#25 + vadd.i32 q0,q0,q1 + add r0,r0,r5 + vadd.i32 q4,q4,q5 + mov r10,r10,ror#16 + vadd.i32 q8,q8,q9 + add r1,r1,r6 + veor q3,q3,q0 + mov r12,r12,ror#16 + veor q7,q7,q4 + eor r10,r10,r0,ror#16 + veor q11,q11,q8 + eor r12,r12,r1,ror#16 + vrev32.16 q3,q3 + add r8,r8,r10 + vrev32.16 q7,q7 + mov r5,r5,ror#20 + vrev32.16 q11,q11 + add r9,r9,r12 + vadd.i32 q2,q2,q3 + mov r6,r6,ror#20 + vadd.i32 q6,q6,q7 + eor r5,r5,r8,ror#20 + vadd.i32 q10,q10,q11 + eor r6,r6,r9,ror#20 + veor q12,q1,q2 + add r0,r0,r5 + veor q13,q5,q6 + mov r10,r10,ror#24 + veor q14,q9,q10 + add r1,r1,r6 + vshr.u32 q1,q12,#20 + mov r12,r12,ror#24 + vshr.u32 q5,q13,#20 + eor r10,r10,r0,ror#24 + vshr.u32 q9,q14,#20 + eor r12,r12,r1,ror#24 + vsli.32 q1,q12,#12 + add r8,r8,r10 + vsli.32 q5,q13,#12 + mov r5,r5,ror#25 + vsli.32 q9,q14,#12 + str r10,[sp,#4*(16+15)] + vadd.i32 q0,q0,q1 + ldr r10,[sp,#4*(16+13)] + vadd.i32 q4,q4,q5 + add r9,r9,r12 + vadd.i32 q8,q8,q9 + mov r6,r6,ror#25 + veor q12,q3,q0 + eor r5,r5,r8,ror#25 + veor q13,q7,q4 + eor r6,r6,r9,ror#25 + veor q14,q11,q8 + str r8,[sp,#4*(16+10)] + vshr.u32 q3,q12,#24 + ldr r8,[sp,#4*(16+8)] + vshr.u32 q7,q13,#24 + add r2,r2,r7 + vshr.u32 q11,q14,#24 + mov r10,r10,ror#16 + vsli.32 q3,q12,#8 + str r9,[sp,#4*(16+11)] + vsli.32 q7,q13,#8 + ldr r9,[sp,#4*(16+9)] + vsli.32 q11,q14,#8 + add r3,r3,r4 + vadd.i32 q2,q2,q3 + mov r14,r14,ror#16 + vadd.i32 q6,q6,q7 + eor r10,r10,r2,ror#16 + vadd.i32 q10,q10,q11 + eor r14,r14,r3,ror#16 + veor q12,q1,q2 + add r8,r8,r10 + veor q13,q5,q6 + mov r7,r7,ror#20 + veor q14,q9,q10 + add r9,r9,r14 + vshr.u32 q1,q12,#25 + mov r4,r4,ror#20 + vshr.u32 q5,q13,#25 + eor r7,r7,r8,ror#20 + vshr.u32 q9,q14,#25 + eor r4,r4,r9,ror#20 + vsli.32 q1,q12,#7 + add r2,r2,r7 + vsli.32 q5,q13,#7 + mov r10,r10,ror#24 + vsli.32 q9,q14,#7 + add r3,r3,r4 + vext.8 q2,q2,q2,#8 + mov r14,r14,ror#24 + vext.8 q6,q6,q6,#8 + eor r10,r10,r2,ror#24 + vext.8 q10,q10,q10,#8 + eor r14,r14,r3,ror#24 + vext.8 q1,q1,q1,#12 + add r8,r8,r10 + vext.8 q5,q5,q5,#12 + mov r7,r7,ror#25 + vext.8 q9,q9,q9,#12 + add r9,r9,r14 + vext.8 q3,q3,q3,#4 + mov r4,r4,ror#25 + vext.8 q7,q7,q7,#4 + eor r7,r7,r8,ror#25 + vext.8 q11,q11,q11,#4 + eor r4,r4,r9,ror#25 + bne .Loop_neon + + add r11,sp,#32 + vld1.32 {q12,q13},[sp] @ load key material + vld1.32 {q14,q15},[r11] + + ldr r11,[sp,#4*(32+2)] @ load len + + str r8, [sp,#4*(16+8)] @ modulo-scheduled store + str r9, [sp,#4*(16+9)] + str r12,[sp,#4*(16+12)] + str r10, [sp,#4*(16+13)] + str r14,[sp,#4*(16+14)] + + @ at this point we have first half of 512-bit result in + @ rx and second half at sp+4*(16+8) + + ldr r12,[sp,#4*(32+1)] @ load inp + ldr r14,[sp,#4*(32+0)] @ load out + + vadd.i32 q0,q0,q12 @ accumulate key material + vadd.i32 q4,q4,q12 + vadd.i32 q8,q8,q12 + vldr d24,[sp,#4*(16+0)] @ one + + vadd.i32 q1,q1,q13 + vadd.i32 q5,q5,q13 + vadd.i32 q9,q9,q13 + vldr d26,[sp,#4*(16+2)] @ two + + vadd.i32 q2,q2,q14 + vadd.i32 q6,q6,q14 + vadd.i32 q10,q10,q14 + vadd.i32 d14,d14,d24 @ counter+1 + vadd.i32 d22,d22,d26 @ counter+2 + + vadd.i32 q3,q3,q15 + vadd.i32 q7,q7,q15 + vadd.i32 q11,q11,q15 + + cmp r11,#64*4 + blo .Ltail_neon + + vld1.8 {q12,q13},[r12]! @ load input + mov r11,sp + vld1.8 {q14,q15},[r12]! + veor q0,q0,q12 @ xor with input + veor q1,q1,q13 + vld1.8 {q12,q13},[r12]! + veor q2,q2,q14 + veor q3,q3,q15 + vld1.8 {q14,q15},[r12]! + + veor q4,q4,q12 + vst1.8 {q0,q1},[r14]! @ store output + veor q5,q5,q13 + vld1.8 {q12,q13},[r12]! + veor q6,q6,q14 + vst1.8 {q2,q3},[r14]! + veor q7,q7,q15 + vld1.8 {q14,q15},[r12]! + + veor q8,q8,q12 + vld1.32 {q0,q1},[r11]! @ load for next iteration + veor d25,d25,d25 + vldr d24,[sp,#4*(16+4)] @ four + veor q9,q9,q13 + vld1.32 {q2,q3},[r11] + veor q10,q10,q14 + vst1.8 {q4,q5},[r14]! + veor q11,q11,q15 + vst1.8 {q6,q7},[r14]! + + vadd.i32 d6,d6,d24 @ next counter value + vldr d24,[sp,#4*(16+0)] @ one + + ldmia sp,{r8,r9,r10,r11} @ load key material + add r0,r0,r8 @ accumulate key material + ldr r8,[r12],#16 @ load input + vst1.8 {q8,q9},[r14]! + add r1,r1,r9 + ldr r9,[r12,#-12] + vst1.8 {q10,q11},[r14]! + add r2,r2,r10 + ldr r10,[r12,#-8] + add r3,r3,r11 + ldr r11,[r12,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + eor r0,r0,r8 @ xor with input + add r8,sp,#4*(4) + eor r1,r1,r9 + str r0,[r14],#16 @ store output + eor r2,r2,r10 + str r1,[r14,#-12] + eor r3,r3,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material + str r2,[r14,#-8] + str r3,[r14,#-4] + + add r4,r4,r8 @ accumulate key material + ldr r8,[r12],#16 @ load input + add r5,r5,r9 + ldr r9,[r12,#-12] + add r6,r6,r10 + ldr r10,[r12,#-8] + add r7,r7,r11 + ldr r11,[r12,#-4] +# ifdef __ARMEB__ + rev r4,r4 + rev r5,r5 + rev r6,r6 + rev r7,r7 +# endif + eor r4,r4,r8 + add r8,sp,#4*(8) + eor r5,r5,r9 + str r4,[r14],#16 @ store output + eor r6,r6,r10 + str r5,[r14,#-12] + eor r7,r7,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material + str r6,[r14,#-8] + add r0,sp,#4*(16+8) + str r7,[r14,#-4] + + ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half + + add r0,r0,r8 @ accumulate key material + ldr r8,[r12],#16 @ load input + add r1,r1,r9 + ldr r9,[r12,#-12] +# ifdef __thumb2__ + it hi +# endif + strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it + add r2,r2,r10 + ldr r10,[r12,#-8] +# ifdef __thumb2__ + it hi +# endif + strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it + add r3,r3,r11 + ldr r11,[r12,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + eor r0,r0,r8 + add r8,sp,#4*(12) + eor r1,r1,r9 + str r0,[r14],#16 @ store output + eor r2,r2,r10 + str r1,[r14,#-12] + eor r3,r3,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material + str r2,[r14,#-8] + str r3,[r14,#-4] + + add r4,r4,r8 @ accumulate key material + add r8,r8,#4 @ next counter value + add r5,r5,r9 + str r8,[sp,#4*(12)] @ save next counter value + ldr r8,[r12],#16 @ load input + add r6,r6,r10 + add r4,r4,#3 @ counter+3 + ldr r9,[r12,#-12] + add r7,r7,r11 + ldr r10,[r12,#-8] + ldr r11,[r12,#-4] +# ifdef __ARMEB__ + rev r4,r4 + rev r5,r5 + rev r6,r6 + rev r7,r7 +# endif + eor r4,r4,r8 +# ifdef __thumb2__ + it hi +# endif + ldrhi r8,[sp,#4*(32+2)] @ re-load len + eor r5,r5,r9 + eor r6,r6,r10 + str r4,[r14],#16 @ store output + eor r7,r7,r11 + str r5,[r14,#-12] + sub r11,r8,#64*4 @ len-=64*4 + str r6,[r14,#-8] + str r7,[r14,#-4] + bhi .Loop_neon_outer + + b .Ldone_neon + +.align 4 +.Lbreak_neon: + @ harmonize NEON and integer-only stack frames: load data + @ from NEON frame, but save to integer-only one; distance + @ between the two is 4*(32+4+16-32)=4*(20). + + str r11, [sp,#4*(20+32+2)] @ save len + add r11,sp,#4*(32+4) + str r12, [sp,#4*(20+32+1)] @ save inp + str r14, [sp,#4*(20+32+0)] @ save out + + ldr r12,[sp,#4*(16+10)] + ldr r14,[sp,#4*(16+11)] + vldmia r11,{d8,d9,d10,d11,d12,d13,d14,d15} @ fulfill ABI requirement + str r12,[sp,#4*(20+16+10)] @ copy "rx" + str r14,[sp,#4*(20+16+11)] @ copy "rx" + + ldr r11, [sp,#4*(15)] + ldr r12,[sp,#4*(12)] @ modulo-scheduled load + ldr r10, [sp,#4*(13)] + ldr r14,[sp,#4*(14)] + str r11, [sp,#4*(20+16+15)] + add r11,sp,#4*(20) + vst1.32 {q0,q1},[r11]! @ copy key + add sp,sp,#4*(20) @ switch frame + vst1.32 {q2,q3},[r11] + mov r11,#10 + b .Loop @ go integer-only + +.align 4 +.Ltail_neon: + cmp r11,#64*3 + bhs .L192_or_more_neon + cmp r11,#64*2 + bhs .L128_or_more_neon + cmp r11,#64*1 + bhs .L64_or_more_neon + + add r8,sp,#4*(8) + vst1.8 {q0,q1},[sp] + add r10,sp,#4*(0) + vst1.8 {q2,q3},[r8] + b .Loop_tail_neon + +.align 4 +.L64_or_more_neon: + vld1.8 {q12,q13},[r12]! + vld1.8 {q14,q15},[r12]! + veor q0,q0,q12 + veor q1,q1,q13 + veor q2,q2,q14 + veor q3,q3,q15 + vst1.8 {q0,q1},[r14]! + vst1.8 {q2,q3},[r14]! + + beq .Ldone_neon + + add r8,sp,#4*(8) + vst1.8 {q4,q5},[sp] + add r10,sp,#4*(0) + vst1.8 {q6,q7},[r8] + sub r11,r11,#64*1 @ len-=64*1 + b .Loop_tail_neon + +.align 4 +.L128_or_more_neon: + vld1.8 {q12,q13},[r12]! + vld1.8 {q14,q15},[r12]! + veor q0,q0,q12 + veor q1,q1,q13 + vld1.8 {q12,q13},[r12]! + veor q2,q2,q14 + veor q3,q3,q15 + vld1.8 {q14,q15},[r12]! + + veor q4,q4,q12 + veor q5,q5,q13 + vst1.8 {q0,q1},[r14]! + veor q6,q6,q14 + vst1.8 {q2,q3},[r14]! + veor q7,q7,q15 + vst1.8 {q4,q5},[r14]! + vst1.8 {q6,q7},[r14]! + + beq .Ldone_neon + + add r8,sp,#4*(8) + vst1.8 {q8,q9},[sp] + add r10,sp,#4*(0) + vst1.8 {q10,q11},[r8] + sub r11,r11,#64*2 @ len-=64*2 + b .Loop_tail_neon + +.align 4 +.L192_or_more_neon: + vld1.8 {q12,q13},[r12]! + vld1.8 {q14,q15},[r12]! + veor q0,q0,q12 + veor q1,q1,q13 + vld1.8 {q12,q13},[r12]! + veor q2,q2,q14 + veor q3,q3,q15 + vld1.8 {q14,q15},[r12]! + + veor q4,q4,q12 + veor q5,q5,q13 + vld1.8 {q12,q13},[r12]! + veor q6,q6,q14 + vst1.8 {q0,q1},[r14]! + veor q7,q7,q15 + vld1.8 {q14,q15},[r12]! + + veor q8,q8,q12 + vst1.8 {q2,q3},[r14]! + veor q9,q9,q13 + vst1.8 {q4,q5},[r14]! + veor q10,q10,q14 + vst1.8 {q6,q7},[r14]! + veor q11,q11,q15 + vst1.8 {q8,q9},[r14]! + vst1.8 {q10,q11},[r14]! + + beq .Ldone_neon + + ldmia sp,{r8,r9,r10,r11} @ load key material + add r0,r0,r8 @ accumulate key material + add r8,sp,#4*(4) + add r1,r1,r9 + add r2,r2,r10 + add r3,r3,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material + + add r4,r4,r8 @ accumulate key material + add r8,sp,#4*(8) + add r5,r5,r9 + add r6,r6,r10 + add r7,r7,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 + rev r4,r4 + rev r5,r5 + rev r6,r6 + rev r7,r7 +# endif + stmia sp,{r0,r1,r2,r3,r4,r5,r6,r7} + add r0,sp,#4*(16+8) + + ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half + + add r0,r0,r8 @ accumulate key material + add r8,sp,#4*(12) + add r1,r1,r9 + add r2,r2,r10 + add r3,r3,r11 + ldmia r8,{r8,r9,r10,r11} @ load key material + + add r4,r4,r8 @ accumulate key material + add r8,sp,#4*(8) + add r5,r5,r9 + add r4,r4,#3 @ counter+3 + add r6,r6,r10 + add r7,r7,r11 + ldr r11,[sp,#4*(32+2)] @ re-load len +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 + rev r4,r4 + rev r5,r5 + rev r6,r6 + rev r7,r7 +# endif + stmia r8,{r0,r1,r2,r3,r4,r5,r6,r7} + add r10,sp,#4*(0) + sub r11,r11,#64*3 @ len-=64*3 + +.Loop_tail_neon: + ldrb r8,[r10],#1 @ read buffer on stack + ldrb r9,[r12],#1 @ read input + subs r11,r11,#1 + eor r8,r8,r9 + strb r8,[r14],#1 @ store output + bne .Loop_tail_neon + +.Ldone_neon: + add sp,sp,#4*(32+4) + vldmia sp,{d8,d9,d10,d11,d12,d13,d14,d15} + add sp,sp,#4*(16+3) + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon +#endif +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-apple.S b/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-apple.S new file mode 100644 index 0000000000..3807631e44 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-apple.S @@ -0,0 +1,1968 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.section __TEXT,__const + +.align 5 +Lsigma: +.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral +Lone: +.long 1,0,0,0 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.text + +.globl _ChaCha20_ctr32_nohw +.private_extern _ChaCha20_ctr32_nohw + +.align 5 +_ChaCha20_ctr32_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,Lsigma@PAGE + add x5,x5,Lsigma@PAGEOFF + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ldp x28,x30,[x4] // load counter +#ifdef __AARCH64EB__ + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + +Loop_outer: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov w7,w23 + lsr x8,x23,#32 + mov w9,w24 + lsr x10,x24,#32 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#64 +Loop: + sub x4,x4,#1 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + ror w21,w21,#16 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#20 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + ror w21,w21,#24 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#25 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#16 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + ror w9,w9,#20 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#24 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + ror w9,w9,#25 + cbnz x4,Loop + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + b.lo Ltail + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + + b.hi Loop_outer + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.align 4 +Ltail: + add x2,x2,#64 +Less_than_64: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + stp x5,x7,[sp,#0] + stp x9,x11,[sp,#16] + stp x13,x15,[sp,#32] + stp x17,x20,[sp,#48] + +Loop_tail: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,Loop_tail + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.globl _ChaCha20_ctr32_neon +.private_extern _ChaCha20_ctr32_neon + +.align 5 +_ChaCha20_ctr32_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,Lsigma@PAGE + add x5,x5,Lsigma@PAGEOFF + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + cmp x2,#512 + b.hs L512_or_more_neon + + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ld1 {v24.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v25.4s,v26.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v27.4s},[x4] + ld1 {v31.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v24.4s,v24.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v27.4s,v27.4s,v31.4s // += 1 + add v28.4s,v27.4s,v31.4s + add v29.4s,v28.4s,v31.4s + shl v31.4s,v31.4s,#2 // 1 -> 4 + +Loop_outer_neon: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov v0.16b,v24.16b + mov w7,w23 + lsr x8,x23,#32 + mov v4.16b,v24.16b + mov w9,w24 + lsr x10,x24,#32 + mov v16.16b,v24.16b + mov w11,w25 + mov v1.16b,v25.16b + lsr x12,x25,#32 + mov v5.16b,v25.16b + mov w13,w26 + mov v17.16b,v25.16b + lsr x14,x26,#32 + mov v3.16b,v27.16b + mov w15,w27 + mov v7.16b,v28.16b + lsr x16,x27,#32 + mov v19.16b,v29.16b + mov w17,w28 + mov v2.16b,v26.16b + lsr x19,x28,#32 + mov v6.16b,v26.16b + mov w20,w30 + mov v18.16b,v26.16b + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#256 +Loop_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + eor v3.16b,v3.16b,v0.16b + add w8,w8,w12 + eor v7.16b,v7.16b,v4.16b + eor w17,w17,w5 + eor v19.16b,v19.16b,v16.16b + eor w19,w19,w6 + rev32 v3.8h,v3.8h + eor w20,w20,w7 + rev32 v7.8h,v7.8h + eor w21,w21,w8 + rev32 v19.8h,v19.8h + ror w17,w17,#16 + add v2.4s,v2.4s,v3.4s + ror w19,w19,#16 + add v6.4s,v6.4s,v7.4s + ror w20,w20,#16 + add v18.4s,v18.4s,v19.4s + ror w21,w21,#16 + eor v20.16b,v1.16b,v2.16b + add w13,w13,w17 + eor v21.16b,v5.16b,v6.16b + add w14,w14,w19 + eor v22.16b,v17.16b,v18.16b + add w15,w15,w20 + ushr v1.4s,v20.4s,#20 + add w16,w16,w21 + ushr v5.4s,v21.4s,#20 + eor w9,w9,w13 + ushr v17.4s,v22.4s,#20 + eor w10,w10,w14 + sli v1.4s,v20.4s,#12 + eor w11,w11,w15 + sli v5.4s,v21.4s,#12 + eor w12,w12,w16 + sli v17.4s,v22.4s,#12 + ror w9,w9,#20 + add v0.4s,v0.4s,v1.4s + ror w10,w10,#20 + add v4.4s,v4.4s,v5.4s + ror w11,w11,#20 + add v16.4s,v16.4s,v17.4s + ror w12,w12,#20 + eor v20.16b,v3.16b,v0.16b + add w5,w5,w9 + eor v21.16b,v7.16b,v4.16b + add w6,w6,w10 + eor v22.16b,v19.16b,v16.16b + add w7,w7,w11 + ushr v3.4s,v20.4s,#24 + add w8,w8,w12 + ushr v7.4s,v21.4s,#24 + eor w17,w17,w5 + ushr v19.4s,v22.4s,#24 + eor w19,w19,w6 + sli v3.4s,v20.4s,#8 + eor w20,w20,w7 + sli v7.4s,v21.4s,#8 + eor w21,w21,w8 + sli v19.4s,v22.4s,#8 + ror w17,w17,#24 + add v2.4s,v2.4s,v3.4s + ror w19,w19,#24 + add v6.4s,v6.4s,v7.4s + ror w20,w20,#24 + add v18.4s,v18.4s,v19.4s + ror w21,w21,#24 + eor v20.16b,v1.16b,v2.16b + add w13,w13,w17 + eor v21.16b,v5.16b,v6.16b + add w14,w14,w19 + eor v22.16b,v17.16b,v18.16b + add w15,w15,w20 + ushr v1.4s,v20.4s,#25 + add w16,w16,w21 + ushr v5.4s,v21.4s,#25 + eor w9,w9,w13 + ushr v17.4s,v22.4s,#25 + eor w10,w10,w14 + sli v1.4s,v20.4s,#7 + eor w11,w11,w15 + sli v5.4s,v21.4s,#7 + eor w12,w12,w16 + sli v17.4s,v22.4s,#7 + ror w9,w9,#25 + ext v2.16b,v2.16b,v2.16b,#8 + ror w10,w10,#25 + ext v6.16b,v6.16b,v6.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w10 + add v4.4s,v4.4s,v5.4s + add w6,w6,w11 + add v16.4s,v16.4s,v17.4s + add w7,w7,w12 + eor v3.16b,v3.16b,v0.16b + add w8,w8,w9 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w5 + eor v19.16b,v19.16b,v16.16b + eor w17,w17,w6 + rev32 v3.8h,v3.8h + eor w19,w19,w7 + rev32 v7.8h,v7.8h + eor w20,w20,w8 + rev32 v19.8h,v19.8h + ror w21,w21,#16 + add v2.4s,v2.4s,v3.4s + ror w17,w17,#16 + add v6.4s,v6.4s,v7.4s + ror w19,w19,#16 + add v18.4s,v18.4s,v19.4s + ror w20,w20,#16 + eor v20.16b,v1.16b,v2.16b + add w15,w15,w21 + eor v21.16b,v5.16b,v6.16b + add w16,w16,w17 + eor v22.16b,v17.16b,v18.16b + add w13,w13,w19 + ushr v1.4s,v20.4s,#20 + add w14,w14,w20 + ushr v5.4s,v21.4s,#20 + eor w10,w10,w15 + ushr v17.4s,v22.4s,#20 + eor w11,w11,w16 + sli v1.4s,v20.4s,#12 + eor w12,w12,w13 + sli v5.4s,v21.4s,#12 + eor w9,w9,w14 + sli v17.4s,v22.4s,#12 + ror w10,w10,#20 + add v0.4s,v0.4s,v1.4s + ror w11,w11,#20 + add v4.4s,v4.4s,v5.4s + ror w12,w12,#20 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#20 + eor v20.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v21.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v22.16b,v19.16b,v16.16b + add w7,w7,w12 + ushr v3.4s,v20.4s,#24 + add w8,w8,w9 + ushr v7.4s,v21.4s,#24 + eor w21,w21,w5 + ushr v19.4s,v22.4s,#24 + eor w17,w17,w6 + sli v3.4s,v20.4s,#8 + eor w19,w19,w7 + sli v7.4s,v21.4s,#8 + eor w20,w20,w8 + sli v19.4s,v22.4s,#8 + ror w21,w21,#24 + add v2.4s,v2.4s,v3.4s + ror w17,w17,#24 + add v6.4s,v6.4s,v7.4s + ror w19,w19,#24 + add v18.4s,v18.4s,v19.4s + ror w20,w20,#24 + eor v20.16b,v1.16b,v2.16b + add w15,w15,w21 + eor v21.16b,v5.16b,v6.16b + add w16,w16,w17 + eor v22.16b,v17.16b,v18.16b + add w13,w13,w19 + ushr v1.4s,v20.4s,#25 + add w14,w14,w20 + ushr v5.4s,v21.4s,#25 + eor w10,w10,w15 + ushr v17.4s,v22.4s,#25 + eor w11,w11,w16 + sli v1.4s,v20.4s,#7 + eor w12,w12,w13 + sli v5.4s,v21.4s,#7 + eor w9,w9,w14 + sli v17.4s,v22.4s,#7 + ror w10,w10,#25 + ext v2.16b,v2.16b,v2.16b,#8 + ror w11,w11,#25 + ext v6.16b,v6.16b,v6.16b,#8 + ror w12,w12,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + cbnz x4,Loop_neon + + add w5,w5,w22 // accumulate key block + add v0.4s,v0.4s,v24.4s + add x6,x6,x22,lsr#32 + add v4.4s,v4.4s,v24.4s + add w7,w7,w23 + add v16.4s,v16.4s,v24.4s + add x8,x8,x23,lsr#32 + add v2.4s,v2.4s,v26.4s + add w9,w9,w24 + add v6.4s,v6.4s,v26.4s + add x10,x10,x24,lsr#32 + add v18.4s,v18.4s,v26.4s + add w11,w11,w25 + add v3.4s,v3.4s,v27.4s + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add v7.4s,v7.4s,v28.4s + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add v19.4s,v19.4s,v29.4s + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add v1.4s,v1.4s,v25.4s + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add v5.4s,v5.4s,v25.4s + add x21,x21,x30,lsr#32 + add v17.4s,v17.4s,v25.4s + + b.lo Ltail_neon + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v0.16b,v0.16b,v20.16b + eor x15,x15,x16 + eor v1.16b,v1.16b,v21.16b + eor x17,x17,x19 + eor v2.16b,v2.16b,v22.16b + eor x20,x20,x21 + eor v3.16b,v3.16b,v23.16b + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#4 // increment counter + stp x9,x11,[x0,#16] + add v27.4s,v27.4s,v31.4s // += 4 + stp x13,x15,[x0,#32] + add v28.4s,v28.4s,v31.4s + stp x17,x20,[x0,#48] + add v29.4s,v29.4s,v31.4s + add x0,x0,#64 + + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + + eor v4.16b,v4.16b,v20.16b + eor v5.16b,v5.16b,v21.16b + eor v6.16b,v6.16b,v22.16b + eor v7.16b,v7.16b,v23.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + + eor v16.16b,v16.16b,v0.16b + eor v17.16b,v17.16b,v1.16b + eor v18.16b,v18.16b,v2.16b + eor v19.16b,v19.16b,v3.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + b.hi Loop_outer_neon + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +Ltail_neon: + add x2,x2,#256 + cmp x2,#64 + b.lo Less_than_64 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#4 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + b.eq Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo Less_than_128 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v0.16b,v0.16b,v20.16b + eor v1.16b,v1.16b,v21.16b + eor v2.16b,v2.16b,v22.16b + eor v3.16b,v3.16b,v23.16b + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + b.eq Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo Less_than_192 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v4.16b,v4.16b,v20.16b + eor v5.16b,v5.16b,v21.16b + eor v6.16b,v6.16b,v22.16b + eor v7.16b,v7.16b,v23.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + b.eq Ldone_neon + sub x2,x2,#64 + + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[sp] + b Last_neon + +Less_than_128: + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[sp] + b Last_neon +Less_than_192: + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[sp] + b Last_neon + +.align 4 +Last_neon: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + +Loop_tail_neon: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,Loop_tail_neon + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + +Ldone_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.align 5 +ChaCha20_512_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,Lsigma@PAGE + add x5,x5,Lsigma@PAGEOFF + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + +L512_or_more_neon: + sub sp,sp,#128+64 + + ldp x22,x23,[x5] // load sigma + ld1 {v24.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v25.4s,v26.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v27.4s},[x4] + ld1 {v31.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v24.4s,v24.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v27.4s,v27.4s,v31.4s // += 1 + stp q24,q25,[sp,#0] // off-load key block, invariant part + add v27.4s,v27.4s,v31.4s // not typo + str q26,[sp,#32] + add v28.4s,v27.4s,v31.4s + add v29.4s,v28.4s,v31.4s + add v30.4s,v29.4s,v31.4s + shl v31.4s,v31.4s,#2 // 1 -> 4 + + stp d8,d9,[sp,#128+0] // meet ABI requirements + stp d10,d11,[sp,#128+16] + stp d12,d13,[sp,#128+32] + stp d14,d15,[sp,#128+48] + + sub x2,x2,#512 // not typo + +Loop_outer_512_neon: + mov v0.16b,v24.16b + mov v4.16b,v24.16b + mov v8.16b,v24.16b + mov v12.16b,v24.16b + mov v16.16b,v24.16b + mov v20.16b,v24.16b + mov v1.16b,v25.16b + mov w5,w22 // unpack key block + mov v5.16b,v25.16b + lsr x6,x22,#32 + mov v9.16b,v25.16b + mov w7,w23 + mov v13.16b,v25.16b + lsr x8,x23,#32 + mov v17.16b,v25.16b + mov w9,w24 + mov v21.16b,v25.16b + lsr x10,x24,#32 + mov v3.16b,v27.16b + mov w11,w25 + mov v7.16b,v28.16b + lsr x12,x25,#32 + mov v11.16b,v29.16b + mov w13,w26 + mov v15.16b,v30.16b + lsr x14,x26,#32 + mov v2.16b,v26.16b + mov w15,w27 + mov v6.16b,v26.16b + lsr x16,x27,#32 + add v19.4s,v3.4s,v31.4s // +4 + mov w17,w28 + add v23.4s,v7.4s,v31.4s // +4 + lsr x19,x28,#32 + mov v10.16b,v26.16b + mov w20,w30 + mov v14.16b,v26.16b + lsr x21,x30,#32 + mov v18.16b,v26.16b + stp q27,q28,[sp,#48] // off-load key block, variable part + mov v22.16b,v26.16b + str q29,[sp,#80] + + mov x4,#5 + subs x2,x2,#512 +Loop_upper_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v11.16b,v11.16b,v11.16b,#12 + ext v15.16b,v15.16b,v15.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v23.16b,v23.16b,v23.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v11.16b,v11.16b,v11.16b,#4 + ext v15.16b,v15.16b,v15.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v23.16b,v23.16b,v23.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + cbnz x4,Loop_upper_neon + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + mov w5,w22 // unpack key block + lsr x6,x22,#32 + stp x9,x11,[x0,#16] + mov w7,w23 + lsr x8,x23,#32 + stp x13,x15,[x0,#32] + mov w9,w24 + lsr x10,x24,#32 + stp x17,x20,[x0,#48] + add x0,x0,#64 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#5 +Loop_lower_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v11.16b,v11.16b,v11.16b,#12 + ext v15.16b,v15.16b,v15.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v23.16b,v23.16b,v23.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v11.16b,v11.16b,v11.16b,#4 + ext v15.16b,v15.16b,v15.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v23.16b,v23.16b,v23.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + cbnz x4,Loop_lower_neon + + add w5,w5,w22 // accumulate key block + ldp q24,q25,[sp,#0] + add x6,x6,x22,lsr#32 + ldp q26,q27,[sp,#32] + add w7,w7,w23 + ldp q28,q29,[sp,#64] + add x8,x8,x23,lsr#32 + add v0.4s,v0.4s,v24.4s + add w9,w9,w24 + add v4.4s,v4.4s,v24.4s + add x10,x10,x24,lsr#32 + add v8.4s,v8.4s,v24.4s + add w11,w11,w25 + add v12.4s,v12.4s,v24.4s + add x12,x12,x25,lsr#32 + add v16.4s,v16.4s,v24.4s + add w13,w13,w26 + add v20.4s,v20.4s,v24.4s + add x14,x14,x26,lsr#32 + add v2.4s,v2.4s,v26.4s + add w15,w15,w27 + add v6.4s,v6.4s,v26.4s + add x16,x16,x27,lsr#32 + add v10.4s,v10.4s,v26.4s + add w17,w17,w28 + add v14.4s,v14.4s,v26.4s + add x19,x19,x28,lsr#32 + add v18.4s,v18.4s,v26.4s + add w20,w20,w30 + add v22.4s,v22.4s,v26.4s + add x21,x21,x30,lsr#32 + add v19.4s,v19.4s,v31.4s // +4 + add x5,x5,x6,lsl#32 // pack + add v23.4s,v23.4s,v31.4s // +4 + add x7,x7,x8,lsl#32 + add v3.4s,v3.4s,v27.4s + ldp x6,x8,[x1,#0] // load input + add v7.4s,v7.4s,v28.4s + add x9,x9,x10,lsl#32 + add v11.4s,v11.4s,v29.4s + add x11,x11,x12,lsl#32 + add v15.4s,v15.4s,v30.4s + ldp x10,x12,[x1,#16] + add v19.4s,v19.4s,v27.4s + add x13,x13,x14,lsl#32 + add v23.4s,v23.4s,v28.4s + add x15,x15,x16,lsl#32 + add v1.4s,v1.4s,v25.4s + ldp x14,x16,[x1,#32] + add v5.4s,v5.4s,v25.4s + add x17,x17,x19,lsl#32 + add v9.4s,v9.4s,v25.4s + add x20,x20,x21,lsl#32 + add v13.4s,v13.4s,v25.4s + ldp x19,x21,[x1,#48] + add v17.4s,v17.4s,v25.4s + add x1,x1,#64 + add v21.4s,v21.4s,v25.4s + +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v0.16b,v0.16b,v24.16b + eor x15,x15,x16 + eor v1.16b,v1.16b,v25.16b + eor x17,x17,x19 + eor v2.16b,v2.16b,v26.16b + eor x20,x20,x21 + eor v3.16b,v3.16b,v27.16b + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#7 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + eor v4.16b,v4.16b,v24.16b + eor v5.16b,v5.16b,v25.16b + eor v6.16b,v6.16b,v26.16b + eor v7.16b,v7.16b,v27.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor v8.16b,v8.16b,v0.16b + ldp q24,q25,[sp,#0] + eor v9.16b,v9.16b,v1.16b + ldp q26,q27,[sp,#32] + eor v10.16b,v10.16b,v2.16b + eor v11.16b,v11.16b,v3.16b + st1 {v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64 + + ld1 {v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64 + eor v12.16b,v12.16b,v4.16b + eor v13.16b,v13.16b,v5.16b + eor v14.16b,v14.16b,v6.16b + eor v15.16b,v15.16b,v7.16b + st1 {v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64 + + ld1 {v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64 + eor v16.16b,v16.16b,v8.16b + eor v17.16b,v17.16b,v9.16b + eor v18.16b,v18.16b,v10.16b + eor v19.16b,v19.16b,v11.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + shl v0.4s,v31.4s,#1 // 4 -> 8 + eor v20.16b,v20.16b,v12.16b + eor v21.16b,v21.16b,v13.16b + eor v22.16b,v22.16b,v14.16b + eor v23.16b,v23.16b,v15.16b + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + + add v27.4s,v27.4s,v0.4s // += 8 + add v28.4s,v28.4s,v0.4s + add v29.4s,v29.4s,v0.4s + add v30.4s,v30.4s,v0.4s + + b.hs Loop_outer_512_neon + + adds x2,x2,#512 + ushr v0.4s,v31.4s,#2 // 4 -> 1 + + ldp d8,d9,[sp,#128+0] // meet ABI requirements + ldp d10,d11,[sp,#128+16] + ldp d12,d13,[sp,#128+32] + ldp d14,d15,[sp,#128+48] + + stp q24,q31,[sp,#0] // wipe off-load area + stp q24,q31,[sp,#32] + stp q24,q31,[sp,#64] + + b.eq Ldone_512_neon + + cmp x2,#192 + sub v27.4s,v27.4s,v0.4s // -= 1 + sub v28.4s,v28.4s,v0.4s + sub v29.4s,v29.4s,v0.4s + add sp,sp,#128 + b.hs Loop_outer_neon + + eor v25.16b,v25.16b,v25.16b + eor v26.16b,v26.16b,v26.16b + eor v27.16b,v27.16b,v27.16b + eor v28.16b,v28.16b,v28.16b + eor v29.16b,v29.16b,v29.16b + eor v30.16b,v30.16b,v30.16b + b Loop_outer + +Ldone_512_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#128+64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-linux.S b/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-linux.S new file mode 100644 index 0000000000..55fa58379c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-linux.S @@ -0,0 +1,1968 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.section .rodata + +.align 5 +.Lsigma: +.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral +.Lone: +.long 1,0,0,0 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.text + +.globl ChaCha20_ctr32_nohw +.hidden ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,%function +.align 5 +ChaCha20_ctr32_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,.Lsigma + add x5,x5,:lo12:.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ldp x28,x30,[x4] // load counter +#ifdef __AARCH64EB__ + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + +.Loop_outer: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov w7,w23 + lsr x8,x23,#32 + mov w9,w24 + lsr x10,x24,#32 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#64 +.Loop: + sub x4,x4,#1 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + ror w21,w21,#16 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#20 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + ror w21,w21,#24 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#25 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#16 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + ror w9,w9,#20 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#24 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + ror w9,w9,#25 + cbnz x4,.Loop + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + b.lo .Ltail + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + + b.hi .Loop_outer + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.align 4 +.Ltail: + add x2,x2,#64 +.Less_than_64: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + stp x5,x7,[sp,#0] + stp x9,x11,[sp,#16] + stp x13,x15,[sp,#32] + stp x17,x20,[sp,#48] + +.Loop_tail: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,.Loop_tail + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw + +.globl ChaCha20_ctr32_neon +.hidden ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function +.align 5 +ChaCha20_ctr32_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,.Lsigma + add x5,x5,:lo12:.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + cmp x2,#512 + b.hs .L512_or_more_neon + + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ld1 {v24.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v25.4s,v26.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v27.4s},[x4] + ld1 {v31.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v24.4s,v24.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v27.4s,v27.4s,v31.4s // += 1 + add v28.4s,v27.4s,v31.4s + add v29.4s,v28.4s,v31.4s + shl v31.4s,v31.4s,#2 // 1 -> 4 + +.Loop_outer_neon: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov v0.16b,v24.16b + mov w7,w23 + lsr x8,x23,#32 + mov v4.16b,v24.16b + mov w9,w24 + lsr x10,x24,#32 + mov v16.16b,v24.16b + mov w11,w25 + mov v1.16b,v25.16b + lsr x12,x25,#32 + mov v5.16b,v25.16b + mov w13,w26 + mov v17.16b,v25.16b + lsr x14,x26,#32 + mov v3.16b,v27.16b + mov w15,w27 + mov v7.16b,v28.16b + lsr x16,x27,#32 + mov v19.16b,v29.16b + mov w17,w28 + mov v2.16b,v26.16b + lsr x19,x28,#32 + mov v6.16b,v26.16b + mov w20,w30 + mov v18.16b,v26.16b + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#256 +.Loop_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + eor v3.16b,v3.16b,v0.16b + add w8,w8,w12 + eor v7.16b,v7.16b,v4.16b + eor w17,w17,w5 + eor v19.16b,v19.16b,v16.16b + eor w19,w19,w6 + rev32 v3.8h,v3.8h + eor w20,w20,w7 + rev32 v7.8h,v7.8h + eor w21,w21,w8 + rev32 v19.8h,v19.8h + ror w17,w17,#16 + add v2.4s,v2.4s,v3.4s + ror w19,w19,#16 + add v6.4s,v6.4s,v7.4s + ror w20,w20,#16 + add v18.4s,v18.4s,v19.4s + ror w21,w21,#16 + eor v20.16b,v1.16b,v2.16b + add w13,w13,w17 + eor v21.16b,v5.16b,v6.16b + add w14,w14,w19 + eor v22.16b,v17.16b,v18.16b + add w15,w15,w20 + ushr v1.4s,v20.4s,#20 + add w16,w16,w21 + ushr v5.4s,v21.4s,#20 + eor w9,w9,w13 + ushr v17.4s,v22.4s,#20 + eor w10,w10,w14 + sli v1.4s,v20.4s,#12 + eor w11,w11,w15 + sli v5.4s,v21.4s,#12 + eor w12,w12,w16 + sli v17.4s,v22.4s,#12 + ror w9,w9,#20 + add v0.4s,v0.4s,v1.4s + ror w10,w10,#20 + add v4.4s,v4.4s,v5.4s + ror w11,w11,#20 + add v16.4s,v16.4s,v17.4s + ror w12,w12,#20 + eor v20.16b,v3.16b,v0.16b + add w5,w5,w9 + eor v21.16b,v7.16b,v4.16b + add w6,w6,w10 + eor v22.16b,v19.16b,v16.16b + add w7,w7,w11 + ushr v3.4s,v20.4s,#24 + add w8,w8,w12 + ushr v7.4s,v21.4s,#24 + eor w17,w17,w5 + ushr v19.4s,v22.4s,#24 + eor w19,w19,w6 + sli v3.4s,v20.4s,#8 + eor w20,w20,w7 + sli v7.4s,v21.4s,#8 + eor w21,w21,w8 + sli v19.4s,v22.4s,#8 + ror w17,w17,#24 + add v2.4s,v2.4s,v3.4s + ror w19,w19,#24 + add v6.4s,v6.4s,v7.4s + ror w20,w20,#24 + add v18.4s,v18.4s,v19.4s + ror w21,w21,#24 + eor v20.16b,v1.16b,v2.16b + add w13,w13,w17 + eor v21.16b,v5.16b,v6.16b + add w14,w14,w19 + eor v22.16b,v17.16b,v18.16b + add w15,w15,w20 + ushr v1.4s,v20.4s,#25 + add w16,w16,w21 + ushr v5.4s,v21.4s,#25 + eor w9,w9,w13 + ushr v17.4s,v22.4s,#25 + eor w10,w10,w14 + sli v1.4s,v20.4s,#7 + eor w11,w11,w15 + sli v5.4s,v21.4s,#7 + eor w12,w12,w16 + sli v17.4s,v22.4s,#7 + ror w9,w9,#25 + ext v2.16b,v2.16b,v2.16b,#8 + ror w10,w10,#25 + ext v6.16b,v6.16b,v6.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w10 + add v4.4s,v4.4s,v5.4s + add w6,w6,w11 + add v16.4s,v16.4s,v17.4s + add w7,w7,w12 + eor v3.16b,v3.16b,v0.16b + add w8,w8,w9 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w5 + eor v19.16b,v19.16b,v16.16b + eor w17,w17,w6 + rev32 v3.8h,v3.8h + eor w19,w19,w7 + rev32 v7.8h,v7.8h + eor w20,w20,w8 + rev32 v19.8h,v19.8h + ror w21,w21,#16 + add v2.4s,v2.4s,v3.4s + ror w17,w17,#16 + add v6.4s,v6.4s,v7.4s + ror w19,w19,#16 + add v18.4s,v18.4s,v19.4s + ror w20,w20,#16 + eor v20.16b,v1.16b,v2.16b + add w15,w15,w21 + eor v21.16b,v5.16b,v6.16b + add w16,w16,w17 + eor v22.16b,v17.16b,v18.16b + add w13,w13,w19 + ushr v1.4s,v20.4s,#20 + add w14,w14,w20 + ushr v5.4s,v21.4s,#20 + eor w10,w10,w15 + ushr v17.4s,v22.4s,#20 + eor w11,w11,w16 + sli v1.4s,v20.4s,#12 + eor w12,w12,w13 + sli v5.4s,v21.4s,#12 + eor w9,w9,w14 + sli v17.4s,v22.4s,#12 + ror w10,w10,#20 + add v0.4s,v0.4s,v1.4s + ror w11,w11,#20 + add v4.4s,v4.4s,v5.4s + ror w12,w12,#20 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#20 + eor v20.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v21.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v22.16b,v19.16b,v16.16b + add w7,w7,w12 + ushr v3.4s,v20.4s,#24 + add w8,w8,w9 + ushr v7.4s,v21.4s,#24 + eor w21,w21,w5 + ushr v19.4s,v22.4s,#24 + eor w17,w17,w6 + sli v3.4s,v20.4s,#8 + eor w19,w19,w7 + sli v7.4s,v21.4s,#8 + eor w20,w20,w8 + sli v19.4s,v22.4s,#8 + ror w21,w21,#24 + add v2.4s,v2.4s,v3.4s + ror w17,w17,#24 + add v6.4s,v6.4s,v7.4s + ror w19,w19,#24 + add v18.4s,v18.4s,v19.4s + ror w20,w20,#24 + eor v20.16b,v1.16b,v2.16b + add w15,w15,w21 + eor v21.16b,v5.16b,v6.16b + add w16,w16,w17 + eor v22.16b,v17.16b,v18.16b + add w13,w13,w19 + ushr v1.4s,v20.4s,#25 + add w14,w14,w20 + ushr v5.4s,v21.4s,#25 + eor w10,w10,w15 + ushr v17.4s,v22.4s,#25 + eor w11,w11,w16 + sli v1.4s,v20.4s,#7 + eor w12,w12,w13 + sli v5.4s,v21.4s,#7 + eor w9,w9,w14 + sli v17.4s,v22.4s,#7 + ror w10,w10,#25 + ext v2.16b,v2.16b,v2.16b,#8 + ror w11,w11,#25 + ext v6.16b,v6.16b,v6.16b,#8 + ror w12,w12,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + cbnz x4,.Loop_neon + + add w5,w5,w22 // accumulate key block + add v0.4s,v0.4s,v24.4s + add x6,x6,x22,lsr#32 + add v4.4s,v4.4s,v24.4s + add w7,w7,w23 + add v16.4s,v16.4s,v24.4s + add x8,x8,x23,lsr#32 + add v2.4s,v2.4s,v26.4s + add w9,w9,w24 + add v6.4s,v6.4s,v26.4s + add x10,x10,x24,lsr#32 + add v18.4s,v18.4s,v26.4s + add w11,w11,w25 + add v3.4s,v3.4s,v27.4s + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add v7.4s,v7.4s,v28.4s + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add v19.4s,v19.4s,v29.4s + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add v1.4s,v1.4s,v25.4s + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add v5.4s,v5.4s,v25.4s + add x21,x21,x30,lsr#32 + add v17.4s,v17.4s,v25.4s + + b.lo .Ltail_neon + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v0.16b,v0.16b,v20.16b + eor x15,x15,x16 + eor v1.16b,v1.16b,v21.16b + eor x17,x17,x19 + eor v2.16b,v2.16b,v22.16b + eor x20,x20,x21 + eor v3.16b,v3.16b,v23.16b + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#4 // increment counter + stp x9,x11,[x0,#16] + add v27.4s,v27.4s,v31.4s // += 4 + stp x13,x15,[x0,#32] + add v28.4s,v28.4s,v31.4s + stp x17,x20,[x0,#48] + add v29.4s,v29.4s,v31.4s + add x0,x0,#64 + + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + + eor v4.16b,v4.16b,v20.16b + eor v5.16b,v5.16b,v21.16b + eor v6.16b,v6.16b,v22.16b + eor v7.16b,v7.16b,v23.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + + eor v16.16b,v16.16b,v0.16b + eor v17.16b,v17.16b,v1.16b + eor v18.16b,v18.16b,v2.16b + eor v19.16b,v19.16b,v3.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + b.hi .Loop_outer_neon + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.Ltail_neon: + add x2,x2,#256 + cmp x2,#64 + b.lo .Less_than_64 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#4 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + b.eq .Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo .Less_than_128 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v0.16b,v0.16b,v20.16b + eor v1.16b,v1.16b,v21.16b + eor v2.16b,v2.16b,v22.16b + eor v3.16b,v3.16b,v23.16b + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + b.eq .Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo .Less_than_192 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v4.16b,v4.16b,v20.16b + eor v5.16b,v5.16b,v21.16b + eor v6.16b,v6.16b,v22.16b + eor v7.16b,v7.16b,v23.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + b.eq .Ldone_neon + sub x2,x2,#64 + + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[sp] + b .Last_neon + +.Less_than_128: + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[sp] + b .Last_neon +.Less_than_192: + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[sp] + b .Last_neon + +.align 4 +.Last_neon: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + +.Loop_tail_neon: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,.Loop_tail_neon + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + +.Ldone_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon +.type ChaCha20_512_neon,%function +.align 5 +ChaCha20_512_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,.Lsigma + add x5,x5,:lo12:.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + +.L512_or_more_neon: + sub sp,sp,#128+64 + + ldp x22,x23,[x5] // load sigma + ld1 {v24.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v25.4s,v26.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v27.4s},[x4] + ld1 {v31.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v24.4s,v24.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v27.4s,v27.4s,v31.4s // += 1 + stp q24,q25,[sp,#0] // off-load key block, invariant part + add v27.4s,v27.4s,v31.4s // not typo + str q26,[sp,#32] + add v28.4s,v27.4s,v31.4s + add v29.4s,v28.4s,v31.4s + add v30.4s,v29.4s,v31.4s + shl v31.4s,v31.4s,#2 // 1 -> 4 + + stp d8,d9,[sp,#128+0] // meet ABI requirements + stp d10,d11,[sp,#128+16] + stp d12,d13,[sp,#128+32] + stp d14,d15,[sp,#128+48] + + sub x2,x2,#512 // not typo + +.Loop_outer_512_neon: + mov v0.16b,v24.16b + mov v4.16b,v24.16b + mov v8.16b,v24.16b + mov v12.16b,v24.16b + mov v16.16b,v24.16b + mov v20.16b,v24.16b + mov v1.16b,v25.16b + mov w5,w22 // unpack key block + mov v5.16b,v25.16b + lsr x6,x22,#32 + mov v9.16b,v25.16b + mov w7,w23 + mov v13.16b,v25.16b + lsr x8,x23,#32 + mov v17.16b,v25.16b + mov w9,w24 + mov v21.16b,v25.16b + lsr x10,x24,#32 + mov v3.16b,v27.16b + mov w11,w25 + mov v7.16b,v28.16b + lsr x12,x25,#32 + mov v11.16b,v29.16b + mov w13,w26 + mov v15.16b,v30.16b + lsr x14,x26,#32 + mov v2.16b,v26.16b + mov w15,w27 + mov v6.16b,v26.16b + lsr x16,x27,#32 + add v19.4s,v3.4s,v31.4s // +4 + mov w17,w28 + add v23.4s,v7.4s,v31.4s // +4 + lsr x19,x28,#32 + mov v10.16b,v26.16b + mov w20,w30 + mov v14.16b,v26.16b + lsr x21,x30,#32 + mov v18.16b,v26.16b + stp q27,q28,[sp,#48] // off-load key block, variable part + mov v22.16b,v26.16b + str q29,[sp,#80] + + mov x4,#5 + subs x2,x2,#512 +.Loop_upper_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v11.16b,v11.16b,v11.16b,#12 + ext v15.16b,v15.16b,v15.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v23.16b,v23.16b,v23.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v11.16b,v11.16b,v11.16b,#4 + ext v15.16b,v15.16b,v15.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v23.16b,v23.16b,v23.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + cbnz x4,.Loop_upper_neon + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + mov w5,w22 // unpack key block + lsr x6,x22,#32 + stp x9,x11,[x0,#16] + mov w7,w23 + lsr x8,x23,#32 + stp x13,x15,[x0,#32] + mov w9,w24 + lsr x10,x24,#32 + stp x17,x20,[x0,#48] + add x0,x0,#64 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#5 +.Loop_lower_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v11.16b,v11.16b,v11.16b,#12 + ext v15.16b,v15.16b,v15.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v23.16b,v23.16b,v23.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v11.16b,v11.16b,v11.16b,#4 + ext v15.16b,v15.16b,v15.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v23.16b,v23.16b,v23.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + cbnz x4,.Loop_lower_neon + + add w5,w5,w22 // accumulate key block + ldp q24,q25,[sp,#0] + add x6,x6,x22,lsr#32 + ldp q26,q27,[sp,#32] + add w7,w7,w23 + ldp q28,q29,[sp,#64] + add x8,x8,x23,lsr#32 + add v0.4s,v0.4s,v24.4s + add w9,w9,w24 + add v4.4s,v4.4s,v24.4s + add x10,x10,x24,lsr#32 + add v8.4s,v8.4s,v24.4s + add w11,w11,w25 + add v12.4s,v12.4s,v24.4s + add x12,x12,x25,lsr#32 + add v16.4s,v16.4s,v24.4s + add w13,w13,w26 + add v20.4s,v20.4s,v24.4s + add x14,x14,x26,lsr#32 + add v2.4s,v2.4s,v26.4s + add w15,w15,w27 + add v6.4s,v6.4s,v26.4s + add x16,x16,x27,lsr#32 + add v10.4s,v10.4s,v26.4s + add w17,w17,w28 + add v14.4s,v14.4s,v26.4s + add x19,x19,x28,lsr#32 + add v18.4s,v18.4s,v26.4s + add w20,w20,w30 + add v22.4s,v22.4s,v26.4s + add x21,x21,x30,lsr#32 + add v19.4s,v19.4s,v31.4s // +4 + add x5,x5,x6,lsl#32 // pack + add v23.4s,v23.4s,v31.4s // +4 + add x7,x7,x8,lsl#32 + add v3.4s,v3.4s,v27.4s + ldp x6,x8,[x1,#0] // load input + add v7.4s,v7.4s,v28.4s + add x9,x9,x10,lsl#32 + add v11.4s,v11.4s,v29.4s + add x11,x11,x12,lsl#32 + add v15.4s,v15.4s,v30.4s + ldp x10,x12,[x1,#16] + add v19.4s,v19.4s,v27.4s + add x13,x13,x14,lsl#32 + add v23.4s,v23.4s,v28.4s + add x15,x15,x16,lsl#32 + add v1.4s,v1.4s,v25.4s + ldp x14,x16,[x1,#32] + add v5.4s,v5.4s,v25.4s + add x17,x17,x19,lsl#32 + add v9.4s,v9.4s,v25.4s + add x20,x20,x21,lsl#32 + add v13.4s,v13.4s,v25.4s + ldp x19,x21,[x1,#48] + add v17.4s,v17.4s,v25.4s + add x1,x1,#64 + add v21.4s,v21.4s,v25.4s + +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v0.16b,v0.16b,v24.16b + eor x15,x15,x16 + eor v1.16b,v1.16b,v25.16b + eor x17,x17,x19 + eor v2.16b,v2.16b,v26.16b + eor x20,x20,x21 + eor v3.16b,v3.16b,v27.16b + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#7 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + eor v4.16b,v4.16b,v24.16b + eor v5.16b,v5.16b,v25.16b + eor v6.16b,v6.16b,v26.16b + eor v7.16b,v7.16b,v27.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor v8.16b,v8.16b,v0.16b + ldp q24,q25,[sp,#0] + eor v9.16b,v9.16b,v1.16b + ldp q26,q27,[sp,#32] + eor v10.16b,v10.16b,v2.16b + eor v11.16b,v11.16b,v3.16b + st1 {v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64 + + ld1 {v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64 + eor v12.16b,v12.16b,v4.16b + eor v13.16b,v13.16b,v5.16b + eor v14.16b,v14.16b,v6.16b + eor v15.16b,v15.16b,v7.16b + st1 {v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64 + + ld1 {v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64 + eor v16.16b,v16.16b,v8.16b + eor v17.16b,v17.16b,v9.16b + eor v18.16b,v18.16b,v10.16b + eor v19.16b,v19.16b,v11.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + shl v0.4s,v31.4s,#1 // 4 -> 8 + eor v20.16b,v20.16b,v12.16b + eor v21.16b,v21.16b,v13.16b + eor v22.16b,v22.16b,v14.16b + eor v23.16b,v23.16b,v15.16b + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + + add v27.4s,v27.4s,v0.4s // += 8 + add v28.4s,v28.4s,v0.4s + add v29.4s,v29.4s,v0.4s + add v30.4s,v30.4s,v0.4s + + b.hs .Loop_outer_512_neon + + adds x2,x2,#512 + ushr v0.4s,v31.4s,#2 // 4 -> 1 + + ldp d8,d9,[sp,#128+0] // meet ABI requirements + ldp d10,d11,[sp,#128+16] + ldp d12,d13,[sp,#128+32] + ldp d14,d15,[sp,#128+48] + + stp q24,q31,[sp,#0] // wipe off-load area + stp q24,q31,[sp,#32] + stp q24,q31,[sp,#64] + + b.eq .Ldone_512_neon + + cmp x2,#192 + sub v27.4s,v27.4s,v0.4s // -= 1 + sub v28.4s,v28.4s,v0.4s + sub v29.4s,v29.4s,v0.4s + add sp,sp,#128 + b.hs .Loop_outer_neon + + eor v25.16b,v25.16b,v25.16b + eor v26.16b,v26.16b,v26.16b + eor v27.16b,v27.16b,v27.16b + eor v28.16b,v28.16b,v28.16b + eor v29.16b,v29.16b,v29.16b + eor v30.16b,v30.16b,v30.16b + b .Loop_outer + +.Ldone_512_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#128+64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ChaCha20_512_neon,.-ChaCha20_512_neon +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-win.S b/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-win.S new file mode 100644 index 0000000000..851ef4db6c --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-armv8-win.S @@ -0,0 +1,1974 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.section .rodata + +.align 5 +Lsigma: +.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral +Lone: +.long 1,0,0,0 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.text + +.globl ChaCha20_ctr32_nohw + +.def ChaCha20_ctr32_nohw + .type 32 +.endef +.align 5 +ChaCha20_ctr32_nohw: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,Lsigma + add x5,x5,:lo12:Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ldp x28,x30,[x4] // load counter +#ifdef __AARCH64EB__ + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + +Loop_outer: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov w7,w23 + lsr x8,x23,#32 + mov w9,w24 + lsr x10,x24,#32 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#64 +Loop: + sub x4,x4,#1 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + ror w21,w21,#16 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#20 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + ror w21,w21,#24 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#25 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#16 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + ror w9,w9,#20 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#24 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + ror w9,w9,#25 + cbnz x4,Loop + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + b.lo Ltail + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + + b.hi Loop_outer + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.align 4 +Ltail: + add x2,x2,#64 +Less_than_64: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + stp x5,x7,[sp,#0] + stp x9,x11,[sp,#16] + stp x13,x15,[sp,#32] + stp x17,x20,[sp,#48] + +Loop_tail: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,Loop_tail + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.globl ChaCha20_ctr32_neon + +.def ChaCha20_ctr32_neon + .type 32 +.endef +.align 5 +ChaCha20_ctr32_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,Lsigma + add x5,x5,:lo12:Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + cmp x2,#512 + b.hs L512_or_more_neon + + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ld1 {v24.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v25.4s,v26.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v27.4s},[x4] + ld1 {v31.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v24.4s,v24.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v27.4s,v27.4s,v31.4s // += 1 + add v28.4s,v27.4s,v31.4s + add v29.4s,v28.4s,v31.4s + shl v31.4s,v31.4s,#2 // 1 -> 4 + +Loop_outer_neon: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov v0.16b,v24.16b + mov w7,w23 + lsr x8,x23,#32 + mov v4.16b,v24.16b + mov w9,w24 + lsr x10,x24,#32 + mov v16.16b,v24.16b + mov w11,w25 + mov v1.16b,v25.16b + lsr x12,x25,#32 + mov v5.16b,v25.16b + mov w13,w26 + mov v17.16b,v25.16b + lsr x14,x26,#32 + mov v3.16b,v27.16b + mov w15,w27 + mov v7.16b,v28.16b + lsr x16,x27,#32 + mov v19.16b,v29.16b + mov w17,w28 + mov v2.16b,v26.16b + lsr x19,x28,#32 + mov v6.16b,v26.16b + mov w20,w30 + mov v18.16b,v26.16b + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#256 +Loop_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + eor v3.16b,v3.16b,v0.16b + add w8,w8,w12 + eor v7.16b,v7.16b,v4.16b + eor w17,w17,w5 + eor v19.16b,v19.16b,v16.16b + eor w19,w19,w6 + rev32 v3.8h,v3.8h + eor w20,w20,w7 + rev32 v7.8h,v7.8h + eor w21,w21,w8 + rev32 v19.8h,v19.8h + ror w17,w17,#16 + add v2.4s,v2.4s,v3.4s + ror w19,w19,#16 + add v6.4s,v6.4s,v7.4s + ror w20,w20,#16 + add v18.4s,v18.4s,v19.4s + ror w21,w21,#16 + eor v20.16b,v1.16b,v2.16b + add w13,w13,w17 + eor v21.16b,v5.16b,v6.16b + add w14,w14,w19 + eor v22.16b,v17.16b,v18.16b + add w15,w15,w20 + ushr v1.4s,v20.4s,#20 + add w16,w16,w21 + ushr v5.4s,v21.4s,#20 + eor w9,w9,w13 + ushr v17.4s,v22.4s,#20 + eor w10,w10,w14 + sli v1.4s,v20.4s,#12 + eor w11,w11,w15 + sli v5.4s,v21.4s,#12 + eor w12,w12,w16 + sli v17.4s,v22.4s,#12 + ror w9,w9,#20 + add v0.4s,v0.4s,v1.4s + ror w10,w10,#20 + add v4.4s,v4.4s,v5.4s + ror w11,w11,#20 + add v16.4s,v16.4s,v17.4s + ror w12,w12,#20 + eor v20.16b,v3.16b,v0.16b + add w5,w5,w9 + eor v21.16b,v7.16b,v4.16b + add w6,w6,w10 + eor v22.16b,v19.16b,v16.16b + add w7,w7,w11 + ushr v3.4s,v20.4s,#24 + add w8,w8,w12 + ushr v7.4s,v21.4s,#24 + eor w17,w17,w5 + ushr v19.4s,v22.4s,#24 + eor w19,w19,w6 + sli v3.4s,v20.4s,#8 + eor w20,w20,w7 + sli v7.4s,v21.4s,#8 + eor w21,w21,w8 + sli v19.4s,v22.4s,#8 + ror w17,w17,#24 + add v2.4s,v2.4s,v3.4s + ror w19,w19,#24 + add v6.4s,v6.4s,v7.4s + ror w20,w20,#24 + add v18.4s,v18.4s,v19.4s + ror w21,w21,#24 + eor v20.16b,v1.16b,v2.16b + add w13,w13,w17 + eor v21.16b,v5.16b,v6.16b + add w14,w14,w19 + eor v22.16b,v17.16b,v18.16b + add w15,w15,w20 + ushr v1.4s,v20.4s,#25 + add w16,w16,w21 + ushr v5.4s,v21.4s,#25 + eor w9,w9,w13 + ushr v17.4s,v22.4s,#25 + eor w10,w10,w14 + sli v1.4s,v20.4s,#7 + eor w11,w11,w15 + sli v5.4s,v21.4s,#7 + eor w12,w12,w16 + sli v17.4s,v22.4s,#7 + ror w9,w9,#25 + ext v2.16b,v2.16b,v2.16b,#8 + ror w10,w10,#25 + ext v6.16b,v6.16b,v6.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w10 + add v4.4s,v4.4s,v5.4s + add w6,w6,w11 + add v16.4s,v16.4s,v17.4s + add w7,w7,w12 + eor v3.16b,v3.16b,v0.16b + add w8,w8,w9 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w5 + eor v19.16b,v19.16b,v16.16b + eor w17,w17,w6 + rev32 v3.8h,v3.8h + eor w19,w19,w7 + rev32 v7.8h,v7.8h + eor w20,w20,w8 + rev32 v19.8h,v19.8h + ror w21,w21,#16 + add v2.4s,v2.4s,v3.4s + ror w17,w17,#16 + add v6.4s,v6.4s,v7.4s + ror w19,w19,#16 + add v18.4s,v18.4s,v19.4s + ror w20,w20,#16 + eor v20.16b,v1.16b,v2.16b + add w15,w15,w21 + eor v21.16b,v5.16b,v6.16b + add w16,w16,w17 + eor v22.16b,v17.16b,v18.16b + add w13,w13,w19 + ushr v1.4s,v20.4s,#20 + add w14,w14,w20 + ushr v5.4s,v21.4s,#20 + eor w10,w10,w15 + ushr v17.4s,v22.4s,#20 + eor w11,w11,w16 + sli v1.4s,v20.4s,#12 + eor w12,w12,w13 + sli v5.4s,v21.4s,#12 + eor w9,w9,w14 + sli v17.4s,v22.4s,#12 + ror w10,w10,#20 + add v0.4s,v0.4s,v1.4s + ror w11,w11,#20 + add v4.4s,v4.4s,v5.4s + ror w12,w12,#20 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#20 + eor v20.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v21.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v22.16b,v19.16b,v16.16b + add w7,w7,w12 + ushr v3.4s,v20.4s,#24 + add w8,w8,w9 + ushr v7.4s,v21.4s,#24 + eor w21,w21,w5 + ushr v19.4s,v22.4s,#24 + eor w17,w17,w6 + sli v3.4s,v20.4s,#8 + eor w19,w19,w7 + sli v7.4s,v21.4s,#8 + eor w20,w20,w8 + sli v19.4s,v22.4s,#8 + ror w21,w21,#24 + add v2.4s,v2.4s,v3.4s + ror w17,w17,#24 + add v6.4s,v6.4s,v7.4s + ror w19,w19,#24 + add v18.4s,v18.4s,v19.4s + ror w20,w20,#24 + eor v20.16b,v1.16b,v2.16b + add w15,w15,w21 + eor v21.16b,v5.16b,v6.16b + add w16,w16,w17 + eor v22.16b,v17.16b,v18.16b + add w13,w13,w19 + ushr v1.4s,v20.4s,#25 + add w14,w14,w20 + ushr v5.4s,v21.4s,#25 + eor w10,w10,w15 + ushr v17.4s,v22.4s,#25 + eor w11,w11,w16 + sli v1.4s,v20.4s,#7 + eor w12,w12,w13 + sli v5.4s,v21.4s,#7 + eor w9,w9,w14 + sli v17.4s,v22.4s,#7 + ror w10,w10,#25 + ext v2.16b,v2.16b,v2.16b,#8 + ror w11,w11,#25 + ext v6.16b,v6.16b,v6.16b,#8 + ror w12,w12,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + cbnz x4,Loop_neon + + add w5,w5,w22 // accumulate key block + add v0.4s,v0.4s,v24.4s + add x6,x6,x22,lsr#32 + add v4.4s,v4.4s,v24.4s + add w7,w7,w23 + add v16.4s,v16.4s,v24.4s + add x8,x8,x23,lsr#32 + add v2.4s,v2.4s,v26.4s + add w9,w9,w24 + add v6.4s,v6.4s,v26.4s + add x10,x10,x24,lsr#32 + add v18.4s,v18.4s,v26.4s + add w11,w11,w25 + add v3.4s,v3.4s,v27.4s + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add v7.4s,v7.4s,v28.4s + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add v19.4s,v19.4s,v29.4s + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add v1.4s,v1.4s,v25.4s + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add v5.4s,v5.4s,v25.4s + add x21,x21,x30,lsr#32 + add v17.4s,v17.4s,v25.4s + + b.lo Ltail_neon + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v0.16b,v0.16b,v20.16b + eor x15,x15,x16 + eor v1.16b,v1.16b,v21.16b + eor x17,x17,x19 + eor v2.16b,v2.16b,v22.16b + eor x20,x20,x21 + eor v3.16b,v3.16b,v23.16b + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#4 // increment counter + stp x9,x11,[x0,#16] + add v27.4s,v27.4s,v31.4s // += 4 + stp x13,x15,[x0,#32] + add v28.4s,v28.4s,v31.4s + stp x17,x20,[x0,#48] + add v29.4s,v29.4s,v31.4s + add x0,x0,#64 + + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + + eor v4.16b,v4.16b,v20.16b + eor v5.16b,v5.16b,v21.16b + eor v6.16b,v6.16b,v22.16b + eor v7.16b,v7.16b,v23.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + + eor v16.16b,v16.16b,v0.16b + eor v17.16b,v17.16b,v1.16b + eor v18.16b,v18.16b,v2.16b + eor v19.16b,v19.16b,v3.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + b.hi Loop_outer_neon + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +Ltail_neon: + add x2,x2,#256 + cmp x2,#64 + b.lo Less_than_64 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#4 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + b.eq Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo Less_than_128 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v0.16b,v0.16b,v20.16b + eor v1.16b,v1.16b,v21.16b + eor v2.16b,v2.16b,v22.16b + eor v3.16b,v3.16b,v23.16b + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + b.eq Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo Less_than_192 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v4.16b,v4.16b,v20.16b + eor v5.16b,v5.16b,v21.16b + eor v6.16b,v6.16b,v22.16b + eor v7.16b,v7.16b,v23.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + b.eq Ldone_neon + sub x2,x2,#64 + + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[sp] + b Last_neon + +Less_than_128: + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[sp] + b Last_neon +Less_than_192: + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[sp] + b Last_neon + +.align 4 +Last_neon: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + +Loop_tail_neon: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,Loop_tail_neon + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + +Ldone_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.def ChaCha20_512_neon + .type 32 +.endef +.align 5 +ChaCha20_512_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,Lsigma + add x5,x5,:lo12:Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + +L512_or_more_neon: + sub sp,sp,#128+64 + + ldp x22,x23,[x5] // load sigma + ld1 {v24.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v25.4s,v26.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v27.4s},[x4] + ld1 {v31.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v24.4s,v24.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v27.4s,v27.4s,v31.4s // += 1 + stp q24,q25,[sp,#0] // off-load key block, invariant part + add v27.4s,v27.4s,v31.4s // not typo + str q26,[sp,#32] + add v28.4s,v27.4s,v31.4s + add v29.4s,v28.4s,v31.4s + add v30.4s,v29.4s,v31.4s + shl v31.4s,v31.4s,#2 // 1 -> 4 + + stp d8,d9,[sp,#128+0] // meet ABI requirements + stp d10,d11,[sp,#128+16] + stp d12,d13,[sp,#128+32] + stp d14,d15,[sp,#128+48] + + sub x2,x2,#512 // not typo + +Loop_outer_512_neon: + mov v0.16b,v24.16b + mov v4.16b,v24.16b + mov v8.16b,v24.16b + mov v12.16b,v24.16b + mov v16.16b,v24.16b + mov v20.16b,v24.16b + mov v1.16b,v25.16b + mov w5,w22 // unpack key block + mov v5.16b,v25.16b + lsr x6,x22,#32 + mov v9.16b,v25.16b + mov w7,w23 + mov v13.16b,v25.16b + lsr x8,x23,#32 + mov v17.16b,v25.16b + mov w9,w24 + mov v21.16b,v25.16b + lsr x10,x24,#32 + mov v3.16b,v27.16b + mov w11,w25 + mov v7.16b,v28.16b + lsr x12,x25,#32 + mov v11.16b,v29.16b + mov w13,w26 + mov v15.16b,v30.16b + lsr x14,x26,#32 + mov v2.16b,v26.16b + mov w15,w27 + mov v6.16b,v26.16b + lsr x16,x27,#32 + add v19.4s,v3.4s,v31.4s // +4 + mov w17,w28 + add v23.4s,v7.4s,v31.4s // +4 + lsr x19,x28,#32 + mov v10.16b,v26.16b + mov w20,w30 + mov v14.16b,v26.16b + lsr x21,x30,#32 + mov v18.16b,v26.16b + stp q27,q28,[sp,#48] // off-load key block, variable part + mov v22.16b,v26.16b + str q29,[sp,#80] + + mov x4,#5 + subs x2,x2,#512 +Loop_upper_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v11.16b,v11.16b,v11.16b,#12 + ext v15.16b,v15.16b,v15.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v23.16b,v23.16b,v23.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v11.16b,v11.16b,v11.16b,#4 + ext v15.16b,v15.16b,v15.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v23.16b,v23.16b,v23.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + cbnz x4,Loop_upper_neon + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + mov w5,w22 // unpack key block + lsr x6,x22,#32 + stp x9,x11,[x0,#16] + mov w7,w23 + lsr x8,x23,#32 + stp x13,x15,[x0,#32] + mov w9,w24 + lsr x10,x24,#32 + stp x17,x20,[x0,#48] + add x0,x0,#64 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#5 +Loop_lower_neon: + sub x4,x4,#1 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#12 + ext v7.16b,v7.16b,v7.16b,#12 + ext v11.16b,v11.16b,v11.16b,#12 + ext v15.16b,v15.16b,v15.16b,#12 + ext v19.16b,v19.16b,v19.16b,#12 + ext v23.16b,v23.16b,v23.16b,#12 + ext v1.16b,v1.16b,v1.16b,#4 + ext v5.16b,v5.16b,v5.16b,#4 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + add v0.4s,v0.4s,v1.4s + add w5,w5,w9 + add v4.4s,v4.4s,v5.4s + add w6,w6,w10 + add v8.4s,v8.4s,v9.4s + add w7,w7,w11 + add v12.4s,v12.4s,v13.4s + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + eor v3.16b,v3.16b,v0.16b + eor w20,w20,w7 + eor v7.16b,v7.16b,v4.16b + eor w21,w21,w8 + eor v11.16b,v11.16b,v8.16b + ror w17,w17,#16 + eor v15.16b,v15.16b,v12.16b + ror w19,w19,#16 + eor v19.16b,v19.16b,v16.16b + ror w20,w20,#16 + eor v23.16b,v23.16b,v20.16b + ror w21,w21,#16 + rev32 v3.8h,v3.8h + add w13,w13,w17 + rev32 v7.8h,v7.8h + add w14,w14,w19 + rev32 v11.8h,v11.8h + add w15,w15,w20 + rev32 v15.8h,v15.8h + add w16,w16,w21 + rev32 v19.8h,v19.8h + eor w9,w9,w13 + rev32 v23.8h,v23.8h + eor w10,w10,w14 + add v2.4s,v2.4s,v3.4s + eor w11,w11,w15 + add v6.4s,v6.4s,v7.4s + eor w12,w12,w16 + add v10.4s,v10.4s,v11.4s + ror w9,w9,#20 + add v14.4s,v14.4s,v15.4s + ror w10,w10,#20 + add v18.4s,v18.4s,v19.4s + ror w11,w11,#20 + add v22.4s,v22.4s,v23.4s + ror w12,w12,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w9 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w10 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w11 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w12 + eor v28.16b,v17.16b,v18.16b + eor w17,w17,w5 + eor v29.16b,v21.16b,v22.16b + eor w19,w19,w6 + ushr v1.4s,v24.4s,#20 + eor w20,w20,w7 + ushr v5.4s,v25.4s,#20 + eor w21,w21,w8 + ushr v9.4s,v26.4s,#20 + ror w17,w17,#24 + ushr v13.4s,v27.4s,#20 + ror w19,w19,#24 + ushr v17.4s,v28.4s,#20 + ror w20,w20,#24 + ushr v21.4s,v29.4s,#20 + ror w21,w21,#24 + sli v1.4s,v24.4s,#12 + add w13,w13,w17 + sli v5.4s,v25.4s,#12 + add w14,w14,w19 + sli v9.4s,v26.4s,#12 + add w15,w15,w20 + sli v13.4s,v27.4s,#12 + add w16,w16,w21 + sli v17.4s,v28.4s,#12 + eor w9,w9,w13 + sli v21.4s,v29.4s,#12 + eor w10,w10,w14 + add v0.4s,v0.4s,v1.4s + eor w11,w11,w15 + add v4.4s,v4.4s,v5.4s + eor w12,w12,w16 + add v8.4s,v8.4s,v9.4s + ror w9,w9,#25 + add v12.4s,v12.4s,v13.4s + ror w10,w10,#25 + add v16.4s,v16.4s,v17.4s + ror w11,w11,#25 + add v20.4s,v20.4s,v21.4s + ror w12,w12,#25 + eor v24.16b,v3.16b,v0.16b + add w5,w5,w10 + eor v25.16b,v7.16b,v4.16b + add w6,w6,w11 + eor v26.16b,v11.16b,v8.16b + add w7,w7,w12 + eor v27.16b,v15.16b,v12.16b + add w8,w8,w9 + eor v28.16b,v19.16b,v16.16b + eor w21,w21,w5 + eor v29.16b,v23.16b,v20.16b + eor w17,w17,w6 + ushr v3.4s,v24.4s,#24 + eor w19,w19,w7 + ushr v7.4s,v25.4s,#24 + eor w20,w20,w8 + ushr v11.4s,v26.4s,#24 + ror w21,w21,#16 + ushr v15.4s,v27.4s,#24 + ror w17,w17,#16 + ushr v19.4s,v28.4s,#24 + ror w19,w19,#16 + ushr v23.4s,v29.4s,#24 + ror w20,w20,#16 + sli v3.4s,v24.4s,#8 + add w15,w15,w21 + sli v7.4s,v25.4s,#8 + add w16,w16,w17 + sli v11.4s,v26.4s,#8 + add w13,w13,w19 + sli v15.4s,v27.4s,#8 + add w14,w14,w20 + sli v19.4s,v28.4s,#8 + eor w10,w10,w15 + sli v23.4s,v29.4s,#8 + eor w11,w11,w16 + add v2.4s,v2.4s,v3.4s + eor w12,w12,w13 + add v6.4s,v6.4s,v7.4s + eor w9,w9,w14 + add v10.4s,v10.4s,v11.4s + ror w10,w10,#20 + add v14.4s,v14.4s,v15.4s + ror w11,w11,#20 + add v18.4s,v18.4s,v19.4s + ror w12,w12,#20 + add v22.4s,v22.4s,v23.4s + ror w9,w9,#20 + eor v24.16b,v1.16b,v2.16b + add w5,w5,w10 + eor v25.16b,v5.16b,v6.16b + add w6,w6,w11 + eor v26.16b,v9.16b,v10.16b + add w7,w7,w12 + eor v27.16b,v13.16b,v14.16b + add w8,w8,w9 + eor v28.16b,v17.16b,v18.16b + eor w21,w21,w5 + eor v29.16b,v21.16b,v22.16b + eor w17,w17,w6 + ushr v1.4s,v24.4s,#25 + eor w19,w19,w7 + ushr v5.4s,v25.4s,#25 + eor w20,w20,w8 + ushr v9.4s,v26.4s,#25 + ror w21,w21,#24 + ushr v13.4s,v27.4s,#25 + ror w17,w17,#24 + ushr v17.4s,v28.4s,#25 + ror w19,w19,#24 + ushr v21.4s,v29.4s,#25 + ror w20,w20,#24 + sli v1.4s,v24.4s,#7 + add w15,w15,w21 + sli v5.4s,v25.4s,#7 + add w16,w16,w17 + sli v9.4s,v26.4s,#7 + add w13,w13,w19 + sli v13.4s,v27.4s,#7 + add w14,w14,w20 + sli v17.4s,v28.4s,#7 + eor w10,w10,w15 + sli v21.4s,v29.4s,#7 + eor w11,w11,w16 + ext v2.16b,v2.16b,v2.16b,#8 + eor w12,w12,w13 + ext v6.16b,v6.16b,v6.16b,#8 + eor w9,w9,w14 + ext v10.16b,v10.16b,v10.16b,#8 + ror w10,w10,#25 + ext v14.16b,v14.16b,v14.16b,#8 + ror w11,w11,#25 + ext v18.16b,v18.16b,v18.16b,#8 + ror w12,w12,#25 + ext v22.16b,v22.16b,v22.16b,#8 + ror w9,w9,#25 + ext v3.16b,v3.16b,v3.16b,#4 + ext v7.16b,v7.16b,v7.16b,#4 + ext v11.16b,v11.16b,v11.16b,#4 + ext v15.16b,v15.16b,v15.16b,#4 + ext v19.16b,v19.16b,v19.16b,#4 + ext v23.16b,v23.16b,v23.16b,#4 + ext v1.16b,v1.16b,v1.16b,#12 + ext v5.16b,v5.16b,v5.16b,#12 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + cbnz x4,Loop_lower_neon + + add w5,w5,w22 // accumulate key block + ldp q24,q25,[sp,#0] + add x6,x6,x22,lsr#32 + ldp q26,q27,[sp,#32] + add w7,w7,w23 + ldp q28,q29,[sp,#64] + add x8,x8,x23,lsr#32 + add v0.4s,v0.4s,v24.4s + add w9,w9,w24 + add v4.4s,v4.4s,v24.4s + add x10,x10,x24,lsr#32 + add v8.4s,v8.4s,v24.4s + add w11,w11,w25 + add v12.4s,v12.4s,v24.4s + add x12,x12,x25,lsr#32 + add v16.4s,v16.4s,v24.4s + add w13,w13,w26 + add v20.4s,v20.4s,v24.4s + add x14,x14,x26,lsr#32 + add v2.4s,v2.4s,v26.4s + add w15,w15,w27 + add v6.4s,v6.4s,v26.4s + add x16,x16,x27,lsr#32 + add v10.4s,v10.4s,v26.4s + add w17,w17,w28 + add v14.4s,v14.4s,v26.4s + add x19,x19,x28,lsr#32 + add v18.4s,v18.4s,v26.4s + add w20,w20,w30 + add v22.4s,v22.4s,v26.4s + add x21,x21,x30,lsr#32 + add v19.4s,v19.4s,v31.4s // +4 + add x5,x5,x6,lsl#32 // pack + add v23.4s,v23.4s,v31.4s // +4 + add x7,x7,x8,lsl#32 + add v3.4s,v3.4s,v27.4s + ldp x6,x8,[x1,#0] // load input + add v7.4s,v7.4s,v28.4s + add x9,x9,x10,lsl#32 + add v11.4s,v11.4s,v29.4s + add x11,x11,x12,lsl#32 + add v15.4s,v15.4s,v30.4s + ldp x10,x12,[x1,#16] + add v19.4s,v19.4s,v27.4s + add x13,x13,x14,lsl#32 + add v23.4s,v23.4s,v28.4s + add x15,x15,x16,lsl#32 + add v1.4s,v1.4s,v25.4s + ldp x14,x16,[x1,#32] + add v5.4s,v5.4s,v25.4s + add x17,x17,x19,lsl#32 + add v9.4s,v9.4s,v25.4s + add x20,x20,x21,lsl#32 + add v13.4s,v13.4s,v25.4s + ldp x19,x21,[x1,#48] + add v17.4s,v17.4s,v25.4s + add x1,x1,#64 + add v21.4s,v21.4s,v25.4s + +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v0.16b,v0.16b,v24.16b + eor x15,x15,x16 + eor v1.16b,v1.16b,v25.16b + eor x17,x17,x19 + eor v2.16b,v2.16b,v26.16b + eor x20,x20,x21 + eor v3.16b,v3.16b,v27.16b + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#7 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 + + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + eor v4.16b,v4.16b,v24.16b + eor v5.16b,v5.16b,v25.16b + eor v6.16b,v6.16b,v26.16b + eor v7.16b,v7.16b,v27.16b + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor v8.16b,v8.16b,v0.16b + ldp q24,q25,[sp,#0] + eor v9.16b,v9.16b,v1.16b + ldp q26,q27,[sp,#32] + eor v10.16b,v10.16b,v2.16b + eor v11.16b,v11.16b,v3.16b + st1 {v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64 + + ld1 {v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64 + eor v12.16b,v12.16b,v4.16b + eor v13.16b,v13.16b,v5.16b + eor v14.16b,v14.16b,v6.16b + eor v15.16b,v15.16b,v7.16b + st1 {v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64 + + ld1 {v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64 + eor v16.16b,v16.16b,v8.16b + eor v17.16b,v17.16b,v9.16b + eor v18.16b,v18.16b,v10.16b + eor v19.16b,v19.16b,v11.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + shl v0.4s,v31.4s,#1 // 4 -> 8 + eor v20.16b,v20.16b,v12.16b + eor v21.16b,v21.16b,v13.16b + eor v22.16b,v22.16b,v14.16b + eor v23.16b,v23.16b,v15.16b + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + + add v27.4s,v27.4s,v0.4s // += 8 + add v28.4s,v28.4s,v0.4s + add v29.4s,v29.4s,v0.4s + add v30.4s,v30.4s,v0.4s + + b.hs Loop_outer_512_neon + + adds x2,x2,#512 + ushr v0.4s,v31.4s,#2 // 4 -> 1 + + ldp d8,d9,[sp,#128+0] // meet ABI requirements + ldp d10,d11,[sp,#128+16] + ldp d12,d13,[sp,#128+32] + ldp d14,d15,[sp,#128+48] + + stp q24,q31,[sp,#0] // wipe off-load area + stp q24,q31,[sp,#32] + stp q24,q31,[sp,#64] + + b.eq Ldone_512_neon + + cmp x2,#192 + sub v27.4s,v27.4s,v0.4s // -= 1 + sub v28.4s,v28.4s,v0.4s + sub v29.4s,v29.4s,v0.4s + add sp,sp,#128 + b.hs Loop_outer_neon + + eor v25.16b,v25.16b,v25.16b + eor v26.16b,v26.16b,v26.16b + eor v27.16b,v27.16b,v27.16b + eor v28.16b,v28.16b,v28.16b + eor v29.16b,v29.16b,v29.16b + eor v30.16b,v30.16b,v30.16b + b Loop_outer + +Ldone_512_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#128+64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-x86-apple.S b/yass/third_party/boringssl/src/gen/crypto/chacha-x86-apple.S new file mode 100644 index 0000000000..48293da918 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-x86-apple.S @@ -0,0 +1,957 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _ChaCha20_ctr32_nohw +.private_extern _ChaCha20_ctr32_nohw +.align 4 +_ChaCha20_ctr32_nohw: +L_ChaCha20_ctr32_nohw_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 32(%esp),%esi + movl 36(%esp),%edi + subl $132,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,80(%esp) + movl %ebx,84(%esp) + movl %ecx,88(%esp) + movl %edx,92(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,96(%esp) + movl %ebx,100(%esp) + movl %ecx,104(%esp) + movl %edx,108(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + subl $1,%eax + movl %eax,112(%esp) + movl %ebx,116(%esp) + movl %ecx,120(%esp) + movl %edx,124(%esp) + jmp L000entry +.align 4,0x90 +L001outer_loop: + movl %ebx,156(%esp) + movl %eax,152(%esp) + movl %ecx,160(%esp) +L000entry: + movl $1634760805,%eax + movl $857760878,4(%esp) + movl $2036477234,8(%esp) + movl $1797285236,12(%esp) + movl 84(%esp),%ebx + movl 88(%esp),%ebp + movl 104(%esp),%ecx + movl 108(%esp),%esi + movl 116(%esp),%edx + movl 120(%esp),%edi + movl %ebx,20(%esp) + movl %ebp,24(%esp) + movl %ecx,40(%esp) + movl %esi,44(%esp) + movl %edx,52(%esp) + movl %edi,56(%esp) + movl 92(%esp),%ebx + movl 124(%esp),%edi + movl 112(%esp),%edx + movl 80(%esp),%ebp + movl 96(%esp),%ecx + movl 100(%esp),%esi + addl $1,%edx + movl %ebx,28(%esp) + movl %edi,60(%esp) + movl %edx,112(%esp) + movl $10,%ebx + jmp L002loop +.align 4,0x90 +L002loop: + addl %ebp,%eax + movl %ebx,128(%esp) + movl %ebp,%ebx + xorl %eax,%edx + roll $16,%edx + addl %edx,%ecx + xorl %ecx,%ebx + movl 52(%esp),%edi + roll $12,%ebx + movl 20(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,(%esp) + roll $8,%edx + movl 4(%esp),%eax + addl %edx,%ecx + movl %edx,48(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + movl %ecx,32(%esp) + roll $16,%edi + movl %ebx,16(%esp) + addl %edi,%esi + movl 40(%esp),%ecx + xorl %esi,%ebp + movl 56(%esp),%edx + roll $12,%ebp + movl 24(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,4(%esp) + roll $8,%edi + movl 8(%esp),%eax + addl %edi,%esi + movl %edi,52(%esp) + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + movl %esi,36(%esp) + roll $16,%edx + movl %ebp,20(%esp) + addl %edx,%ecx + movl 44(%esp),%esi + xorl %ecx,%ebx + movl 60(%esp),%edi + roll $12,%ebx + movl 28(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,8(%esp) + roll $8,%edx + movl 12(%esp),%eax + addl %edx,%ecx + movl %edx,56(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + roll $16,%edi + movl %ebx,24(%esp) + addl %edi,%esi + xorl %esi,%ebp + roll $12,%ebp + movl 20(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,12(%esp) + roll $8,%edi + movl (%esp),%eax + addl %edi,%esi + movl %edi,%edx + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + roll $16,%edx + movl %ebp,28(%esp) + addl %edx,%ecx + xorl %ecx,%ebx + movl 48(%esp),%edi + roll $12,%ebx + movl 24(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,(%esp) + roll $8,%edx + movl 4(%esp),%eax + addl %edx,%ecx + movl %edx,60(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + movl %ecx,40(%esp) + roll $16,%edi + movl %ebx,20(%esp) + addl %edi,%esi + movl 32(%esp),%ecx + xorl %esi,%ebp + movl 52(%esp),%edx + roll $12,%ebp + movl 28(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,4(%esp) + roll $8,%edi + movl 8(%esp),%eax + addl %edi,%esi + movl %edi,48(%esp) + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + movl %esi,44(%esp) + roll $16,%edx + movl %ebp,24(%esp) + addl %edx,%ecx + movl 36(%esp),%esi + xorl %ecx,%ebx + movl 56(%esp),%edi + roll $12,%ebx + movl 16(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,8(%esp) + roll $8,%edx + movl 12(%esp),%eax + addl %edx,%ecx + movl %edx,52(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + roll $16,%edi + movl %ebx,28(%esp) + addl %edi,%esi + xorl %esi,%ebp + movl 48(%esp),%edx + roll $12,%ebp + movl 128(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,12(%esp) + roll $8,%edi + movl (%esp),%eax + addl %edi,%esi + movl %edi,56(%esp) + xorl %esi,%ebp + roll $7,%ebp + decl %ebx + jnz L002loop + movl 160(%esp),%ebx + addl $1634760805,%eax + addl 80(%esp),%ebp + addl 96(%esp),%ecx + addl 100(%esp),%esi + cmpl $64,%ebx + jb L003tail + movl 156(%esp),%ebx + addl 112(%esp),%edx + addl 120(%esp),%edi + xorl (%ebx),%eax + xorl 16(%ebx),%ebp + movl %eax,(%esp) + movl 152(%esp),%eax + xorl 32(%ebx),%ecx + xorl 36(%ebx),%esi + xorl 48(%ebx),%edx + xorl 56(%ebx),%edi + movl %ebp,16(%eax) + movl %ecx,32(%eax) + movl %esi,36(%eax) + movl %edx,48(%eax) + movl %edi,56(%eax) + movl 4(%esp),%ebp + movl 8(%esp),%ecx + movl 12(%esp),%esi + movl 20(%esp),%edx + movl 24(%esp),%edi + addl $857760878,%ebp + addl $2036477234,%ecx + addl $1797285236,%esi + addl 84(%esp),%edx + addl 88(%esp),%edi + xorl 4(%ebx),%ebp + xorl 8(%ebx),%ecx + xorl 12(%ebx),%esi + xorl 20(%ebx),%edx + xorl 24(%ebx),%edi + movl %ebp,4(%eax) + movl %ecx,8(%eax) + movl %esi,12(%eax) + movl %edx,20(%eax) + movl %edi,24(%eax) + movl 28(%esp),%ebp + movl 40(%esp),%ecx + movl 44(%esp),%esi + movl 52(%esp),%edx + movl 60(%esp),%edi + addl 92(%esp),%ebp + addl 104(%esp),%ecx + addl 108(%esp),%esi + addl 116(%esp),%edx + addl 124(%esp),%edi + xorl 28(%ebx),%ebp + xorl 40(%ebx),%ecx + xorl 44(%ebx),%esi + xorl 52(%ebx),%edx + xorl 60(%ebx),%edi + leal 64(%ebx),%ebx + movl %ebp,28(%eax) + movl (%esp),%ebp + movl %ecx,40(%eax) + movl 160(%esp),%ecx + movl %esi,44(%eax) + movl %edx,52(%eax) + movl %edi,60(%eax) + movl %ebp,(%eax) + leal 64(%eax),%eax + subl $64,%ecx + jnz L001outer_loop + jmp L004done +L003tail: + addl 112(%esp),%edx + addl 120(%esp),%edi + movl %eax,(%esp) + movl %ebp,16(%esp) + movl %ecx,32(%esp) + movl %esi,36(%esp) + movl %edx,48(%esp) + movl %edi,56(%esp) + movl 4(%esp),%ebp + movl 8(%esp),%ecx + movl 12(%esp),%esi + movl 20(%esp),%edx + movl 24(%esp),%edi + addl $857760878,%ebp + addl $2036477234,%ecx + addl $1797285236,%esi + addl 84(%esp),%edx + addl 88(%esp),%edi + movl %ebp,4(%esp) + movl %ecx,8(%esp) + movl %esi,12(%esp) + movl %edx,20(%esp) + movl %edi,24(%esp) + movl 28(%esp),%ebp + movl 40(%esp),%ecx + movl 44(%esp),%esi + movl 52(%esp),%edx + movl 60(%esp),%edi + addl 92(%esp),%ebp + addl 104(%esp),%ecx + addl 108(%esp),%esi + addl 116(%esp),%edx + addl 124(%esp),%edi + movl %ebp,28(%esp) + movl 156(%esp),%ebp + movl %ecx,40(%esp) + movl 152(%esp),%ecx + movl %esi,44(%esp) + xorl %esi,%esi + movl %edx,52(%esp) + movl %edi,60(%esp) + xorl %eax,%eax + xorl %edx,%edx +L005tail_loop: + movb (%esi,%ebp,1),%al + movb (%esp,%esi,1),%dl + leal 1(%esi),%esi + xorb %dl,%al + movb %al,-1(%ecx,%esi,1) + decl %ebx + jnz L005tail_loop +L004done: + addl $132,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _ChaCha20_ctr32_ssse3 +.private_extern _ChaCha20_ctr32_ssse3 +.align 4 +_ChaCha20_ctr32_ssse3: +L_ChaCha20_ctr32_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call Lpic_point +Lpic_point: + popl %eax + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $524,%esp + andl $-64,%esp + movl %ebp,512(%esp) + leal Lssse3_data-Lpic_point(%eax),%eax + movdqu (%ebx),%xmm3 + cmpl $256,%ecx + jb L0061x + movl %edx,516(%esp) + movl %ebx,520(%esp) + subl $256,%ecx + leal 384(%esp),%ebp + movdqu (%edx),%xmm7 + pshufd $0,%xmm3,%xmm0 + pshufd $85,%xmm3,%xmm1 + pshufd $170,%xmm3,%xmm2 + pshufd $255,%xmm3,%xmm3 + paddd 48(%eax),%xmm0 + pshufd $0,%xmm7,%xmm4 + pshufd $85,%xmm7,%xmm5 + psubd 64(%eax),%xmm0 + pshufd $170,%xmm7,%xmm6 + pshufd $255,%xmm7,%xmm7 + movdqa %xmm0,64(%ebp) + movdqa %xmm1,80(%ebp) + movdqa %xmm2,96(%ebp) + movdqa %xmm3,112(%ebp) + movdqu 16(%edx),%xmm3 + movdqa %xmm4,-64(%ebp) + movdqa %xmm5,-48(%ebp) + movdqa %xmm6,-32(%ebp) + movdqa %xmm7,-16(%ebp) + movdqa 32(%eax),%xmm7 + leal 128(%esp),%ebx + pshufd $0,%xmm3,%xmm0 + pshufd $85,%xmm3,%xmm1 + pshufd $170,%xmm3,%xmm2 + pshufd $255,%xmm3,%xmm3 + pshufd $0,%xmm7,%xmm4 + pshufd $85,%xmm7,%xmm5 + pshufd $170,%xmm7,%xmm6 + pshufd $255,%xmm7,%xmm7 + movdqa %xmm0,(%ebp) + movdqa %xmm1,16(%ebp) + movdqa %xmm2,32(%ebp) + movdqa %xmm3,48(%ebp) + movdqa %xmm4,-128(%ebp) + movdqa %xmm5,-112(%ebp) + movdqa %xmm6,-96(%ebp) + movdqa %xmm7,-80(%ebp) + leal 128(%esi),%esi + leal 128(%edi),%edi + jmp L007outer_loop +.align 4,0x90 +L007outer_loop: + movdqa -112(%ebp),%xmm1 + movdqa -96(%ebp),%xmm2 + movdqa -80(%ebp),%xmm3 + movdqa -48(%ebp),%xmm5 + movdqa -32(%ebp),%xmm6 + movdqa -16(%ebp),%xmm7 + movdqa %xmm1,-112(%ebx) + movdqa %xmm2,-96(%ebx) + movdqa %xmm3,-80(%ebx) + movdqa %xmm5,-48(%ebx) + movdqa %xmm6,-32(%ebx) + movdqa %xmm7,-16(%ebx) + movdqa 32(%ebp),%xmm2 + movdqa 48(%ebp),%xmm3 + movdqa 64(%ebp),%xmm4 + movdqa 80(%ebp),%xmm5 + movdqa 96(%ebp),%xmm6 + movdqa 112(%ebp),%xmm7 + paddd 64(%eax),%xmm4 + movdqa %xmm2,32(%ebx) + movdqa %xmm3,48(%ebx) + movdqa %xmm4,64(%ebx) + movdqa %xmm5,80(%ebx) + movdqa %xmm6,96(%ebx) + movdqa %xmm7,112(%ebx) + movdqa %xmm4,64(%ebp) + movdqa -128(%ebp),%xmm0 + movdqa %xmm4,%xmm6 + movdqa -64(%ebp),%xmm3 + movdqa (%ebp),%xmm4 + movdqa 16(%ebp),%xmm5 + movl $10,%edx + nop +.align 4,0x90 +L008loop: + paddd %xmm3,%xmm0 + movdqa %xmm3,%xmm2 + pxor %xmm0,%xmm6 + pshufb (%eax),%xmm6 + paddd %xmm6,%xmm4 + pxor %xmm4,%xmm2 + movdqa -48(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -112(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 80(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-128(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,64(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + movdqa %xmm4,(%ebx) + pshufb (%eax),%xmm7 + movdqa %xmm2,-64(%ebx) + paddd %xmm7,%xmm5 + movdqa 32(%ebx),%xmm4 + pxor %xmm5,%xmm3 + movdqa -32(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -96(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 96(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-112(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,80(%ebx) + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + movdqa %xmm5,16(%ebx) + pshufb (%eax),%xmm6 + movdqa %xmm3,-48(%ebx) + paddd %xmm6,%xmm4 + movdqa 48(%ebx),%xmm5 + pxor %xmm4,%xmm2 + movdqa -16(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -80(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 112(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-96(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,96(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + pshufb (%eax),%xmm7 + movdqa %xmm2,-32(%ebx) + paddd %xmm7,%xmm5 + pxor %xmm5,%xmm3 + movdqa -48(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -128(%ebx),%xmm0 + paddd %xmm3,%xmm1 + pxor %xmm1,%xmm7 + movdqa %xmm1,-80(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,%xmm6 + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + pshufb (%eax),%xmm6 + movdqa %xmm3,-16(%ebx) + paddd %xmm6,%xmm4 + pxor %xmm4,%xmm2 + movdqa -32(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -112(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 64(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-128(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,112(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + movdqa %xmm4,32(%ebx) + pshufb (%eax),%xmm7 + movdqa %xmm2,-48(%ebx) + paddd %xmm7,%xmm5 + movdqa (%ebx),%xmm4 + pxor %xmm5,%xmm3 + movdqa -16(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -96(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 80(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-112(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,64(%ebx) + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + movdqa %xmm5,48(%ebx) + pshufb (%eax),%xmm6 + movdqa %xmm3,-32(%ebx) + paddd %xmm6,%xmm4 + movdqa 16(%ebx),%xmm5 + pxor %xmm4,%xmm2 + movdqa -64(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -80(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 96(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-96(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,80(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + pshufb (%eax),%xmm7 + movdqa %xmm2,-16(%ebx) + paddd %xmm7,%xmm5 + pxor %xmm5,%xmm3 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -128(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 64(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-80(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,96(%ebx) + pxor %xmm5,%xmm3 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + por %xmm1,%xmm3 + decl %edx + jnz L008loop + movdqa %xmm3,-64(%ebx) + movdqa %xmm4,(%ebx) + movdqa %xmm5,16(%ebx) + movdqa %xmm6,64(%ebx) + movdqa %xmm7,96(%ebx) + movdqa -112(%ebx),%xmm1 + movdqa -96(%ebx),%xmm2 + movdqa -80(%ebx),%xmm3 + paddd -128(%ebp),%xmm0 + paddd -112(%ebp),%xmm1 + paddd -96(%ebp),%xmm2 + paddd -80(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 16(%esi),%esi + pxor %xmm0,%xmm4 + movdqa -64(%ebx),%xmm0 + pxor %xmm1,%xmm5 + movdqa -48(%ebx),%xmm1 + pxor %xmm2,%xmm6 + movdqa -32(%ebx),%xmm2 + pxor %xmm3,%xmm7 + movdqa -16(%ebx),%xmm3 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 16(%edi),%edi + paddd -64(%ebp),%xmm0 + paddd -48(%ebp),%xmm1 + paddd -32(%ebp),%xmm2 + paddd -16(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 16(%esi),%esi + pxor %xmm0,%xmm4 + movdqa (%ebx),%xmm0 + pxor %xmm1,%xmm5 + movdqa 16(%ebx),%xmm1 + pxor %xmm2,%xmm6 + movdqa 32(%ebx),%xmm2 + pxor %xmm3,%xmm7 + movdqa 48(%ebx),%xmm3 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 16(%edi),%edi + paddd (%ebp),%xmm0 + paddd 16(%ebp),%xmm1 + paddd 32(%ebp),%xmm2 + paddd 48(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 16(%esi),%esi + pxor %xmm0,%xmm4 + movdqa 64(%ebx),%xmm0 + pxor %xmm1,%xmm5 + movdqa 80(%ebx),%xmm1 + pxor %xmm2,%xmm6 + movdqa 96(%ebx),%xmm2 + pxor %xmm3,%xmm7 + movdqa 112(%ebx),%xmm3 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 16(%edi),%edi + paddd 64(%ebp),%xmm0 + paddd 80(%ebp),%xmm1 + paddd 96(%ebp),%xmm2 + paddd 112(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 208(%esi),%esi + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm5 + pxor %xmm2,%xmm6 + pxor %xmm3,%xmm7 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 208(%edi),%edi + subl $256,%ecx + jnc L007outer_loop + addl $256,%ecx + jz L009done + movl 520(%esp),%ebx + leal -128(%esi),%esi + movl 516(%esp),%edx + leal -128(%edi),%edi + movd 64(%ebp),%xmm2 + movdqu (%ebx),%xmm3 + paddd 96(%eax),%xmm2 + pand 112(%eax),%xmm3 + por %xmm2,%xmm3 +L0061x: + movdqa 32(%eax),%xmm0 + movdqu (%edx),%xmm1 + movdqu 16(%edx),%xmm2 + movdqa (%eax),%xmm6 + movdqa 16(%eax),%xmm7 + movl %ebp,48(%esp) + movdqa %xmm0,(%esp) + movdqa %xmm1,16(%esp) + movdqa %xmm2,32(%esp) + movdqa %xmm3,48(%esp) + movl $10,%edx + jmp L010loop1x +.align 4,0x90 +L011outer1x: + movdqa 80(%eax),%xmm3 + movdqa (%esp),%xmm0 + movdqa 16(%esp),%xmm1 + movdqa 32(%esp),%xmm2 + paddd 48(%esp),%xmm3 + movl $10,%edx + movdqa %xmm3,48(%esp) + jmp L010loop1x +.align 4,0x90 +L010loop1x: + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm1,%xmm1 + pshufd $147,%xmm3,%xmm3 + nop + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm1,%xmm1 + pshufd $57,%xmm3,%xmm3 + decl %edx + jnz L010loop1x + paddd (%esp),%xmm0 + paddd 16(%esp),%xmm1 + paddd 32(%esp),%xmm2 + paddd 48(%esp),%xmm3 + cmpl $64,%ecx + jb L012tail + movdqu (%esi),%xmm4 + movdqu 16(%esi),%xmm5 + pxor %xmm4,%xmm0 + movdqu 32(%esi),%xmm4 + pxor %xmm5,%xmm1 + movdqu 48(%esi),%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm3 + leal 64(%esi),%esi + movdqu %xmm0,(%edi) + movdqu %xmm1,16(%edi) + movdqu %xmm2,32(%edi) + movdqu %xmm3,48(%edi) + leal 64(%edi),%edi + subl $64,%ecx + jnz L011outer1x + jmp L009done +L012tail: + movdqa %xmm0,(%esp) + movdqa %xmm1,16(%esp) + movdqa %xmm2,32(%esp) + movdqa %xmm3,48(%esp) + xorl %eax,%eax + xorl %edx,%edx + xorl %ebp,%ebp +L013tail_loop: + movb (%esp,%ebp,1),%al + movb (%esi,%ebp,1),%dl + leal 1(%ebp),%ebp + xorb %dl,%al + movb %al,-1(%edi,%ebp,1) + decl %ecx + jnz L013tail_loop +L009done: + movl 512(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +Lssse3_data: +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +.long 1634760805,857760878,2036477234,1797285236 +.long 0,1,2,3 +.long 4,4,4,4 +.long 1,0,0,0 +.long 4,0,0,0 +.long 0,-1,-1,-1 +.align 6,0x90 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 +.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +.byte 114,103,62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-x86-linux.S b/yass/third_party/boringssl/src/gen/crypto/chacha-x86-linux.S new file mode 100644 index 0000000000..566fbb4cd6 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-x86-linux.S @@ -0,0 +1,961 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl ChaCha20_ctr32_nohw +.hidden ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,@function +.align 16 +ChaCha20_ctr32_nohw: +.L_ChaCha20_ctr32_nohw_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 32(%esp),%esi + movl 36(%esp),%edi + subl $132,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,80(%esp) + movl %ebx,84(%esp) + movl %ecx,88(%esp) + movl %edx,92(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,96(%esp) + movl %ebx,100(%esp) + movl %ecx,104(%esp) + movl %edx,108(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + subl $1,%eax + movl %eax,112(%esp) + movl %ebx,116(%esp) + movl %ecx,120(%esp) + movl %edx,124(%esp) + jmp .L000entry +.align 16 +.L001outer_loop: + movl %ebx,156(%esp) + movl %eax,152(%esp) + movl %ecx,160(%esp) +.L000entry: + movl $1634760805,%eax + movl $857760878,4(%esp) + movl $2036477234,8(%esp) + movl $1797285236,12(%esp) + movl 84(%esp),%ebx + movl 88(%esp),%ebp + movl 104(%esp),%ecx + movl 108(%esp),%esi + movl 116(%esp),%edx + movl 120(%esp),%edi + movl %ebx,20(%esp) + movl %ebp,24(%esp) + movl %ecx,40(%esp) + movl %esi,44(%esp) + movl %edx,52(%esp) + movl %edi,56(%esp) + movl 92(%esp),%ebx + movl 124(%esp),%edi + movl 112(%esp),%edx + movl 80(%esp),%ebp + movl 96(%esp),%ecx + movl 100(%esp),%esi + addl $1,%edx + movl %ebx,28(%esp) + movl %edi,60(%esp) + movl %edx,112(%esp) + movl $10,%ebx + jmp .L002loop +.align 16 +.L002loop: + addl %ebp,%eax + movl %ebx,128(%esp) + movl %ebp,%ebx + xorl %eax,%edx + roll $16,%edx + addl %edx,%ecx + xorl %ecx,%ebx + movl 52(%esp),%edi + roll $12,%ebx + movl 20(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,(%esp) + roll $8,%edx + movl 4(%esp),%eax + addl %edx,%ecx + movl %edx,48(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + movl %ecx,32(%esp) + roll $16,%edi + movl %ebx,16(%esp) + addl %edi,%esi + movl 40(%esp),%ecx + xorl %esi,%ebp + movl 56(%esp),%edx + roll $12,%ebp + movl 24(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,4(%esp) + roll $8,%edi + movl 8(%esp),%eax + addl %edi,%esi + movl %edi,52(%esp) + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + movl %esi,36(%esp) + roll $16,%edx + movl %ebp,20(%esp) + addl %edx,%ecx + movl 44(%esp),%esi + xorl %ecx,%ebx + movl 60(%esp),%edi + roll $12,%ebx + movl 28(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,8(%esp) + roll $8,%edx + movl 12(%esp),%eax + addl %edx,%ecx + movl %edx,56(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + roll $16,%edi + movl %ebx,24(%esp) + addl %edi,%esi + xorl %esi,%ebp + roll $12,%ebp + movl 20(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,12(%esp) + roll $8,%edi + movl (%esp),%eax + addl %edi,%esi + movl %edi,%edx + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + roll $16,%edx + movl %ebp,28(%esp) + addl %edx,%ecx + xorl %ecx,%ebx + movl 48(%esp),%edi + roll $12,%ebx + movl 24(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,(%esp) + roll $8,%edx + movl 4(%esp),%eax + addl %edx,%ecx + movl %edx,60(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + movl %ecx,40(%esp) + roll $16,%edi + movl %ebx,20(%esp) + addl %edi,%esi + movl 32(%esp),%ecx + xorl %esi,%ebp + movl 52(%esp),%edx + roll $12,%ebp + movl 28(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,4(%esp) + roll $8,%edi + movl 8(%esp),%eax + addl %edi,%esi + movl %edi,48(%esp) + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + movl %esi,44(%esp) + roll $16,%edx + movl %ebp,24(%esp) + addl %edx,%ecx + movl 36(%esp),%esi + xorl %ecx,%ebx + movl 56(%esp),%edi + roll $12,%ebx + movl 16(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,8(%esp) + roll $8,%edx + movl 12(%esp),%eax + addl %edx,%ecx + movl %edx,52(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + roll $16,%edi + movl %ebx,28(%esp) + addl %edi,%esi + xorl %esi,%ebp + movl 48(%esp),%edx + roll $12,%ebp + movl 128(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,12(%esp) + roll $8,%edi + movl (%esp),%eax + addl %edi,%esi + movl %edi,56(%esp) + xorl %esi,%ebp + roll $7,%ebp + decl %ebx + jnz .L002loop + movl 160(%esp),%ebx + addl $1634760805,%eax + addl 80(%esp),%ebp + addl 96(%esp),%ecx + addl 100(%esp),%esi + cmpl $64,%ebx + jb .L003tail + movl 156(%esp),%ebx + addl 112(%esp),%edx + addl 120(%esp),%edi + xorl (%ebx),%eax + xorl 16(%ebx),%ebp + movl %eax,(%esp) + movl 152(%esp),%eax + xorl 32(%ebx),%ecx + xorl 36(%ebx),%esi + xorl 48(%ebx),%edx + xorl 56(%ebx),%edi + movl %ebp,16(%eax) + movl %ecx,32(%eax) + movl %esi,36(%eax) + movl %edx,48(%eax) + movl %edi,56(%eax) + movl 4(%esp),%ebp + movl 8(%esp),%ecx + movl 12(%esp),%esi + movl 20(%esp),%edx + movl 24(%esp),%edi + addl $857760878,%ebp + addl $2036477234,%ecx + addl $1797285236,%esi + addl 84(%esp),%edx + addl 88(%esp),%edi + xorl 4(%ebx),%ebp + xorl 8(%ebx),%ecx + xorl 12(%ebx),%esi + xorl 20(%ebx),%edx + xorl 24(%ebx),%edi + movl %ebp,4(%eax) + movl %ecx,8(%eax) + movl %esi,12(%eax) + movl %edx,20(%eax) + movl %edi,24(%eax) + movl 28(%esp),%ebp + movl 40(%esp),%ecx + movl 44(%esp),%esi + movl 52(%esp),%edx + movl 60(%esp),%edi + addl 92(%esp),%ebp + addl 104(%esp),%ecx + addl 108(%esp),%esi + addl 116(%esp),%edx + addl 124(%esp),%edi + xorl 28(%ebx),%ebp + xorl 40(%ebx),%ecx + xorl 44(%ebx),%esi + xorl 52(%ebx),%edx + xorl 60(%ebx),%edi + leal 64(%ebx),%ebx + movl %ebp,28(%eax) + movl (%esp),%ebp + movl %ecx,40(%eax) + movl 160(%esp),%ecx + movl %esi,44(%eax) + movl %edx,52(%eax) + movl %edi,60(%eax) + movl %ebp,(%eax) + leal 64(%eax),%eax + subl $64,%ecx + jnz .L001outer_loop + jmp .L004done +.L003tail: + addl 112(%esp),%edx + addl 120(%esp),%edi + movl %eax,(%esp) + movl %ebp,16(%esp) + movl %ecx,32(%esp) + movl %esi,36(%esp) + movl %edx,48(%esp) + movl %edi,56(%esp) + movl 4(%esp),%ebp + movl 8(%esp),%ecx + movl 12(%esp),%esi + movl 20(%esp),%edx + movl 24(%esp),%edi + addl $857760878,%ebp + addl $2036477234,%ecx + addl $1797285236,%esi + addl 84(%esp),%edx + addl 88(%esp),%edi + movl %ebp,4(%esp) + movl %ecx,8(%esp) + movl %esi,12(%esp) + movl %edx,20(%esp) + movl %edi,24(%esp) + movl 28(%esp),%ebp + movl 40(%esp),%ecx + movl 44(%esp),%esi + movl 52(%esp),%edx + movl 60(%esp),%edi + addl 92(%esp),%ebp + addl 104(%esp),%ecx + addl 108(%esp),%esi + addl 116(%esp),%edx + addl 124(%esp),%edi + movl %ebp,28(%esp) + movl 156(%esp),%ebp + movl %ecx,40(%esp) + movl 152(%esp),%ecx + movl %esi,44(%esp) + xorl %esi,%esi + movl %edx,52(%esp) + movl %edi,60(%esp) + xorl %eax,%eax + xorl %edx,%edx +.L005tail_loop: + movb (%esi,%ebp,1),%al + movb (%esp,%esi,1),%dl + leal 1(%esi),%esi + xorb %dl,%al + movb %al,-1(%ecx,%esi,1) + decl %ebx + jnz .L005tail_loop +.L004done: + addl $132,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size ChaCha20_ctr32_nohw,.-.L_ChaCha20_ctr32_nohw_begin +.globl ChaCha20_ctr32_ssse3 +.hidden ChaCha20_ctr32_ssse3 +.type ChaCha20_ctr32_ssse3,@function +.align 16 +ChaCha20_ctr32_ssse3: +.L_ChaCha20_ctr32_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .Lpic_point +.Lpic_point: + popl %eax + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $524,%esp + andl $-64,%esp + movl %ebp,512(%esp) + leal .Lssse3_data-.Lpic_point(%eax),%eax + movdqu (%ebx),%xmm3 + cmpl $256,%ecx + jb .L0061x + movl %edx,516(%esp) + movl %ebx,520(%esp) + subl $256,%ecx + leal 384(%esp),%ebp + movdqu (%edx),%xmm7 + pshufd $0,%xmm3,%xmm0 + pshufd $85,%xmm3,%xmm1 + pshufd $170,%xmm3,%xmm2 + pshufd $255,%xmm3,%xmm3 + paddd 48(%eax),%xmm0 + pshufd $0,%xmm7,%xmm4 + pshufd $85,%xmm7,%xmm5 + psubd 64(%eax),%xmm0 + pshufd $170,%xmm7,%xmm6 + pshufd $255,%xmm7,%xmm7 + movdqa %xmm0,64(%ebp) + movdqa %xmm1,80(%ebp) + movdqa %xmm2,96(%ebp) + movdqa %xmm3,112(%ebp) + movdqu 16(%edx),%xmm3 + movdqa %xmm4,-64(%ebp) + movdqa %xmm5,-48(%ebp) + movdqa %xmm6,-32(%ebp) + movdqa %xmm7,-16(%ebp) + movdqa 32(%eax),%xmm7 + leal 128(%esp),%ebx + pshufd $0,%xmm3,%xmm0 + pshufd $85,%xmm3,%xmm1 + pshufd $170,%xmm3,%xmm2 + pshufd $255,%xmm3,%xmm3 + pshufd $0,%xmm7,%xmm4 + pshufd $85,%xmm7,%xmm5 + pshufd $170,%xmm7,%xmm6 + pshufd $255,%xmm7,%xmm7 + movdqa %xmm0,(%ebp) + movdqa %xmm1,16(%ebp) + movdqa %xmm2,32(%ebp) + movdqa %xmm3,48(%ebp) + movdqa %xmm4,-128(%ebp) + movdqa %xmm5,-112(%ebp) + movdqa %xmm6,-96(%ebp) + movdqa %xmm7,-80(%ebp) + leal 128(%esi),%esi + leal 128(%edi),%edi + jmp .L007outer_loop +.align 16 +.L007outer_loop: + movdqa -112(%ebp),%xmm1 + movdqa -96(%ebp),%xmm2 + movdqa -80(%ebp),%xmm3 + movdqa -48(%ebp),%xmm5 + movdqa -32(%ebp),%xmm6 + movdqa -16(%ebp),%xmm7 + movdqa %xmm1,-112(%ebx) + movdqa %xmm2,-96(%ebx) + movdqa %xmm3,-80(%ebx) + movdqa %xmm5,-48(%ebx) + movdqa %xmm6,-32(%ebx) + movdqa %xmm7,-16(%ebx) + movdqa 32(%ebp),%xmm2 + movdqa 48(%ebp),%xmm3 + movdqa 64(%ebp),%xmm4 + movdqa 80(%ebp),%xmm5 + movdqa 96(%ebp),%xmm6 + movdqa 112(%ebp),%xmm7 + paddd 64(%eax),%xmm4 + movdqa %xmm2,32(%ebx) + movdqa %xmm3,48(%ebx) + movdqa %xmm4,64(%ebx) + movdqa %xmm5,80(%ebx) + movdqa %xmm6,96(%ebx) + movdqa %xmm7,112(%ebx) + movdqa %xmm4,64(%ebp) + movdqa -128(%ebp),%xmm0 + movdqa %xmm4,%xmm6 + movdqa -64(%ebp),%xmm3 + movdqa (%ebp),%xmm4 + movdqa 16(%ebp),%xmm5 + movl $10,%edx + nop +.align 16 +.L008loop: + paddd %xmm3,%xmm0 + movdqa %xmm3,%xmm2 + pxor %xmm0,%xmm6 + pshufb (%eax),%xmm6 + paddd %xmm6,%xmm4 + pxor %xmm4,%xmm2 + movdqa -48(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -112(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 80(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-128(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,64(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + movdqa %xmm4,(%ebx) + pshufb (%eax),%xmm7 + movdqa %xmm2,-64(%ebx) + paddd %xmm7,%xmm5 + movdqa 32(%ebx),%xmm4 + pxor %xmm5,%xmm3 + movdqa -32(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -96(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 96(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-112(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,80(%ebx) + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + movdqa %xmm5,16(%ebx) + pshufb (%eax),%xmm6 + movdqa %xmm3,-48(%ebx) + paddd %xmm6,%xmm4 + movdqa 48(%ebx),%xmm5 + pxor %xmm4,%xmm2 + movdqa -16(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -80(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 112(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-96(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,96(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + pshufb (%eax),%xmm7 + movdqa %xmm2,-32(%ebx) + paddd %xmm7,%xmm5 + pxor %xmm5,%xmm3 + movdqa -48(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -128(%ebx),%xmm0 + paddd %xmm3,%xmm1 + pxor %xmm1,%xmm7 + movdqa %xmm1,-80(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,%xmm6 + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + pshufb (%eax),%xmm6 + movdqa %xmm3,-16(%ebx) + paddd %xmm6,%xmm4 + pxor %xmm4,%xmm2 + movdqa -32(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -112(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 64(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-128(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,112(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + movdqa %xmm4,32(%ebx) + pshufb (%eax),%xmm7 + movdqa %xmm2,-48(%ebx) + paddd %xmm7,%xmm5 + movdqa (%ebx),%xmm4 + pxor %xmm5,%xmm3 + movdqa -16(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -96(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 80(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-112(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,64(%ebx) + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + movdqa %xmm5,48(%ebx) + pshufb (%eax),%xmm6 + movdqa %xmm3,-32(%ebx) + paddd %xmm6,%xmm4 + movdqa 16(%ebx),%xmm5 + pxor %xmm4,%xmm2 + movdqa -64(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -80(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 96(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-96(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,80(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + pshufb (%eax),%xmm7 + movdqa %xmm2,-16(%ebx) + paddd %xmm7,%xmm5 + pxor %xmm5,%xmm3 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -128(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 64(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-80(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,96(%ebx) + pxor %xmm5,%xmm3 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + por %xmm1,%xmm3 + decl %edx + jnz .L008loop + movdqa %xmm3,-64(%ebx) + movdqa %xmm4,(%ebx) + movdqa %xmm5,16(%ebx) + movdqa %xmm6,64(%ebx) + movdqa %xmm7,96(%ebx) + movdqa -112(%ebx),%xmm1 + movdqa -96(%ebx),%xmm2 + movdqa -80(%ebx),%xmm3 + paddd -128(%ebp),%xmm0 + paddd -112(%ebp),%xmm1 + paddd -96(%ebp),%xmm2 + paddd -80(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 16(%esi),%esi + pxor %xmm0,%xmm4 + movdqa -64(%ebx),%xmm0 + pxor %xmm1,%xmm5 + movdqa -48(%ebx),%xmm1 + pxor %xmm2,%xmm6 + movdqa -32(%ebx),%xmm2 + pxor %xmm3,%xmm7 + movdqa -16(%ebx),%xmm3 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 16(%edi),%edi + paddd -64(%ebp),%xmm0 + paddd -48(%ebp),%xmm1 + paddd -32(%ebp),%xmm2 + paddd -16(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 16(%esi),%esi + pxor %xmm0,%xmm4 + movdqa (%ebx),%xmm0 + pxor %xmm1,%xmm5 + movdqa 16(%ebx),%xmm1 + pxor %xmm2,%xmm6 + movdqa 32(%ebx),%xmm2 + pxor %xmm3,%xmm7 + movdqa 48(%ebx),%xmm3 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 16(%edi),%edi + paddd (%ebp),%xmm0 + paddd 16(%ebp),%xmm1 + paddd 32(%ebp),%xmm2 + paddd 48(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 16(%esi),%esi + pxor %xmm0,%xmm4 + movdqa 64(%ebx),%xmm0 + pxor %xmm1,%xmm5 + movdqa 80(%ebx),%xmm1 + pxor %xmm2,%xmm6 + movdqa 96(%ebx),%xmm2 + pxor %xmm3,%xmm7 + movdqa 112(%ebx),%xmm3 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 16(%edi),%edi + paddd 64(%ebp),%xmm0 + paddd 80(%ebp),%xmm1 + paddd 96(%ebp),%xmm2 + paddd 112(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqu -128(%esi),%xmm4 + movdqu -64(%esi),%xmm5 + movdqu (%esi),%xmm2 + movdqu 64(%esi),%xmm7 + leal 208(%esi),%esi + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm5 + pxor %xmm2,%xmm6 + pxor %xmm3,%xmm7 + movdqu %xmm4,-128(%edi) + movdqu %xmm5,-64(%edi) + movdqu %xmm6,(%edi) + movdqu %xmm7,64(%edi) + leal 208(%edi),%edi + subl $256,%ecx + jnc .L007outer_loop + addl $256,%ecx + jz .L009done + movl 520(%esp),%ebx + leal -128(%esi),%esi + movl 516(%esp),%edx + leal -128(%edi),%edi + movd 64(%ebp),%xmm2 + movdqu (%ebx),%xmm3 + paddd 96(%eax),%xmm2 + pand 112(%eax),%xmm3 + por %xmm2,%xmm3 +.L0061x: + movdqa 32(%eax),%xmm0 + movdqu (%edx),%xmm1 + movdqu 16(%edx),%xmm2 + movdqa (%eax),%xmm6 + movdqa 16(%eax),%xmm7 + movl %ebp,48(%esp) + movdqa %xmm0,(%esp) + movdqa %xmm1,16(%esp) + movdqa %xmm2,32(%esp) + movdqa %xmm3,48(%esp) + movl $10,%edx + jmp .L010loop1x +.align 16 +.L011outer1x: + movdqa 80(%eax),%xmm3 + movdqa (%esp),%xmm0 + movdqa 16(%esp),%xmm1 + movdqa 32(%esp),%xmm2 + paddd 48(%esp),%xmm3 + movl $10,%edx + movdqa %xmm3,48(%esp) + jmp .L010loop1x +.align 16 +.L010loop1x: + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm1,%xmm1 + pshufd $147,%xmm3,%xmm3 + nop + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm1,%xmm1 + pshufd $57,%xmm3,%xmm3 + decl %edx + jnz .L010loop1x + paddd (%esp),%xmm0 + paddd 16(%esp),%xmm1 + paddd 32(%esp),%xmm2 + paddd 48(%esp),%xmm3 + cmpl $64,%ecx + jb .L012tail + movdqu (%esi),%xmm4 + movdqu 16(%esi),%xmm5 + pxor %xmm4,%xmm0 + movdqu 32(%esi),%xmm4 + pxor %xmm5,%xmm1 + movdqu 48(%esi),%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm3 + leal 64(%esi),%esi + movdqu %xmm0,(%edi) + movdqu %xmm1,16(%edi) + movdqu %xmm2,32(%edi) + movdqu %xmm3,48(%edi) + leal 64(%edi),%edi + subl $64,%ecx + jnz .L011outer1x + jmp .L009done +.L012tail: + movdqa %xmm0,(%esp) + movdqa %xmm1,16(%esp) + movdqa %xmm2,32(%esp) + movdqa %xmm3,48(%esp) + xorl %eax,%eax + xorl %edx,%edx + xorl %ebp,%ebp +.L013tail_loop: + movb (%esp,%ebp,1),%al + movb (%esi,%ebp,1),%dl + leal 1(%ebp),%ebp + xorb %dl,%al + movb %al,-1(%edi,%ebp,1) + decl %ecx + jnz .L013tail_loop +.L009done: + movl 512(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin +.align 64 +.Lssse3_data: +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +.long 1634760805,857760878,2036477234,1797285236 +.long 0,1,2,3 +.long 4,4,4,4 +.long 1,0,0,0 +.long 4,0,0,0 +.long 0,-1,-1,-1 +.align 64 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 +.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +.byte 114,103,62,0 +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-x86-win.asm b/yass/third_party/boringssl/src/gen/crypto/chacha-x86-win.asm new file mode 100644 index 0000000000..d709da0596 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-x86-win.asm @@ -0,0 +1,966 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _ChaCha20_ctr32_nohw +align 16 +_ChaCha20_ctr32_nohw: +L$_ChaCha20_ctr32_nohw_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [32+esp] + mov edi,DWORD [36+esp] + sub esp,132 + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [80+esp],eax + mov DWORD [84+esp],ebx + mov DWORD [88+esp],ecx + mov DWORD [92+esp],edx + mov eax,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edx,DWORD [28+esi] + mov DWORD [96+esp],eax + mov DWORD [100+esp],ebx + mov DWORD [104+esp],ecx + mov DWORD [108+esp],edx + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + sub eax,1 + mov DWORD [112+esp],eax + mov DWORD [116+esp],ebx + mov DWORD [120+esp],ecx + mov DWORD [124+esp],edx + jmp NEAR L$000entry +align 16 +L$001outer_loop: + mov DWORD [156+esp],ebx + mov DWORD [152+esp],eax + mov DWORD [160+esp],ecx +L$000entry: + mov eax,1634760805 + mov DWORD [4+esp],857760878 + mov DWORD [8+esp],2036477234 + mov DWORD [12+esp],1797285236 + mov ebx,DWORD [84+esp] + mov ebp,DWORD [88+esp] + mov ecx,DWORD [104+esp] + mov esi,DWORD [108+esp] + mov edx,DWORD [116+esp] + mov edi,DWORD [120+esp] + mov DWORD [20+esp],ebx + mov DWORD [24+esp],ebp + mov DWORD [40+esp],ecx + mov DWORD [44+esp],esi + mov DWORD [52+esp],edx + mov DWORD [56+esp],edi + mov ebx,DWORD [92+esp] + mov edi,DWORD [124+esp] + mov edx,DWORD [112+esp] + mov ebp,DWORD [80+esp] + mov ecx,DWORD [96+esp] + mov esi,DWORD [100+esp] + add edx,1 + mov DWORD [28+esp],ebx + mov DWORD [60+esp],edi + mov DWORD [112+esp],edx + mov ebx,10 + jmp NEAR L$002loop +align 16 +L$002loop: + add eax,ebp + mov DWORD [128+esp],ebx + mov ebx,ebp + xor edx,eax + rol edx,16 + add ecx,edx + xor ebx,ecx + mov edi,DWORD [52+esp] + rol ebx,12 + mov ebp,DWORD [20+esp] + add eax,ebx + xor edx,eax + mov DWORD [esp],eax + rol edx,8 + mov eax,DWORD [4+esp] + add ecx,edx + mov DWORD [48+esp],edx + xor ebx,ecx + add eax,ebp + rol ebx,7 + xor edi,eax + mov DWORD [32+esp],ecx + rol edi,16 + mov DWORD [16+esp],ebx + add esi,edi + mov ecx,DWORD [40+esp] + xor ebp,esi + mov edx,DWORD [56+esp] + rol ebp,12 + mov ebx,DWORD [24+esp] + add eax,ebp + xor edi,eax + mov DWORD [4+esp],eax + rol edi,8 + mov eax,DWORD [8+esp] + add esi,edi + mov DWORD [52+esp],edi + xor ebp,esi + add eax,ebx + rol ebp,7 + xor edx,eax + mov DWORD [36+esp],esi + rol edx,16 + mov DWORD [20+esp],ebp + add ecx,edx + mov esi,DWORD [44+esp] + xor ebx,ecx + mov edi,DWORD [60+esp] + rol ebx,12 + mov ebp,DWORD [28+esp] + add eax,ebx + xor edx,eax + mov DWORD [8+esp],eax + rol edx,8 + mov eax,DWORD [12+esp] + add ecx,edx + mov DWORD [56+esp],edx + xor ebx,ecx + add eax,ebp + rol ebx,7 + xor edi,eax + rol edi,16 + mov DWORD [24+esp],ebx + add esi,edi + xor ebp,esi + rol ebp,12 + mov ebx,DWORD [20+esp] + add eax,ebp + xor edi,eax + mov DWORD [12+esp],eax + rol edi,8 + mov eax,DWORD [esp] + add esi,edi + mov edx,edi + xor ebp,esi + add eax,ebx + rol ebp,7 + xor edx,eax + rol edx,16 + mov DWORD [28+esp],ebp + add ecx,edx + xor ebx,ecx + mov edi,DWORD [48+esp] + rol ebx,12 + mov ebp,DWORD [24+esp] + add eax,ebx + xor edx,eax + mov DWORD [esp],eax + rol edx,8 + mov eax,DWORD [4+esp] + add ecx,edx + mov DWORD [60+esp],edx + xor ebx,ecx + add eax,ebp + rol ebx,7 + xor edi,eax + mov DWORD [40+esp],ecx + rol edi,16 + mov DWORD [20+esp],ebx + add esi,edi + mov ecx,DWORD [32+esp] + xor ebp,esi + mov edx,DWORD [52+esp] + rol ebp,12 + mov ebx,DWORD [28+esp] + add eax,ebp + xor edi,eax + mov DWORD [4+esp],eax + rol edi,8 + mov eax,DWORD [8+esp] + add esi,edi + mov DWORD [48+esp],edi + xor ebp,esi + add eax,ebx + rol ebp,7 + xor edx,eax + mov DWORD [44+esp],esi + rol edx,16 + mov DWORD [24+esp],ebp + add ecx,edx + mov esi,DWORD [36+esp] + xor ebx,ecx + mov edi,DWORD [56+esp] + rol ebx,12 + mov ebp,DWORD [16+esp] + add eax,ebx + xor edx,eax + mov DWORD [8+esp],eax + rol edx,8 + mov eax,DWORD [12+esp] + add ecx,edx + mov DWORD [52+esp],edx + xor ebx,ecx + add eax,ebp + rol ebx,7 + xor edi,eax + rol edi,16 + mov DWORD [28+esp],ebx + add esi,edi + xor ebp,esi + mov edx,DWORD [48+esp] + rol ebp,12 + mov ebx,DWORD [128+esp] + add eax,ebp + xor edi,eax + mov DWORD [12+esp],eax + rol edi,8 + mov eax,DWORD [esp] + add esi,edi + mov DWORD [56+esp],edi + xor ebp,esi + rol ebp,7 + dec ebx + jnz NEAR L$002loop + mov ebx,DWORD [160+esp] + add eax,1634760805 + add ebp,DWORD [80+esp] + add ecx,DWORD [96+esp] + add esi,DWORD [100+esp] + cmp ebx,64 + jb NEAR L$003tail + mov ebx,DWORD [156+esp] + add edx,DWORD [112+esp] + add edi,DWORD [120+esp] + xor eax,DWORD [ebx] + xor ebp,DWORD [16+ebx] + mov DWORD [esp],eax + mov eax,DWORD [152+esp] + xor ecx,DWORD [32+ebx] + xor esi,DWORD [36+ebx] + xor edx,DWORD [48+ebx] + xor edi,DWORD [56+ebx] + mov DWORD [16+eax],ebp + mov DWORD [32+eax],ecx + mov DWORD [36+eax],esi + mov DWORD [48+eax],edx + mov DWORD [56+eax],edi + mov ebp,DWORD [4+esp] + mov ecx,DWORD [8+esp] + mov esi,DWORD [12+esp] + mov edx,DWORD [20+esp] + mov edi,DWORD [24+esp] + add ebp,857760878 + add ecx,2036477234 + add esi,1797285236 + add edx,DWORD [84+esp] + add edi,DWORD [88+esp] + xor ebp,DWORD [4+ebx] + xor ecx,DWORD [8+ebx] + xor esi,DWORD [12+ebx] + xor edx,DWORD [20+ebx] + xor edi,DWORD [24+ebx] + mov DWORD [4+eax],ebp + mov DWORD [8+eax],ecx + mov DWORD [12+eax],esi + mov DWORD [20+eax],edx + mov DWORD [24+eax],edi + mov ebp,DWORD [28+esp] + mov ecx,DWORD [40+esp] + mov esi,DWORD [44+esp] + mov edx,DWORD [52+esp] + mov edi,DWORD [60+esp] + add ebp,DWORD [92+esp] + add ecx,DWORD [104+esp] + add esi,DWORD [108+esp] + add edx,DWORD [116+esp] + add edi,DWORD [124+esp] + xor ebp,DWORD [28+ebx] + xor ecx,DWORD [40+ebx] + xor esi,DWORD [44+ebx] + xor edx,DWORD [52+ebx] + xor edi,DWORD [60+ebx] + lea ebx,[64+ebx] + mov DWORD [28+eax],ebp + mov ebp,DWORD [esp] + mov DWORD [40+eax],ecx + mov ecx,DWORD [160+esp] + mov DWORD [44+eax],esi + mov DWORD [52+eax],edx + mov DWORD [60+eax],edi + mov DWORD [eax],ebp + lea eax,[64+eax] + sub ecx,64 + jnz NEAR L$001outer_loop + jmp NEAR L$004done +L$003tail: + add edx,DWORD [112+esp] + add edi,DWORD [120+esp] + mov DWORD [esp],eax + mov DWORD [16+esp],ebp + mov DWORD [32+esp],ecx + mov DWORD [36+esp],esi + mov DWORD [48+esp],edx + mov DWORD [56+esp],edi + mov ebp,DWORD [4+esp] + mov ecx,DWORD [8+esp] + mov esi,DWORD [12+esp] + mov edx,DWORD [20+esp] + mov edi,DWORD [24+esp] + add ebp,857760878 + add ecx,2036477234 + add esi,1797285236 + add edx,DWORD [84+esp] + add edi,DWORD [88+esp] + mov DWORD [4+esp],ebp + mov DWORD [8+esp],ecx + mov DWORD [12+esp],esi + mov DWORD [20+esp],edx + mov DWORD [24+esp],edi + mov ebp,DWORD [28+esp] + mov ecx,DWORD [40+esp] + mov esi,DWORD [44+esp] + mov edx,DWORD [52+esp] + mov edi,DWORD [60+esp] + add ebp,DWORD [92+esp] + add ecx,DWORD [104+esp] + add esi,DWORD [108+esp] + add edx,DWORD [116+esp] + add edi,DWORD [124+esp] + mov DWORD [28+esp],ebp + mov ebp,DWORD [156+esp] + mov DWORD [40+esp],ecx + mov ecx,DWORD [152+esp] + mov DWORD [44+esp],esi + xor esi,esi + mov DWORD [52+esp],edx + mov DWORD [60+esp],edi + xor eax,eax + xor edx,edx +L$005tail_loop: + mov al,BYTE [ebp*1+esi] + mov dl,BYTE [esi*1+esp] + lea esi,[1+esi] + xor al,dl + mov BYTE [esi*1+ecx-1],al + dec ebx + jnz NEAR L$005tail_loop +L$004done: + add esp,132 + pop edi + pop esi + pop ebx + pop ebp + ret +global _ChaCha20_ctr32_ssse3 +align 16 +_ChaCha20_ctr32_ssse3: +L$_ChaCha20_ctr32_ssse3_begin: + push ebp + push ebx + push esi + push edi + call L$pic_point +L$pic_point: + pop eax + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov ecx,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ebp,esp + sub esp,524 + and esp,-64 + mov DWORD [512+esp],ebp + lea eax,[(L$ssse3_data-L$pic_point)+eax] + movdqu xmm3,[ebx] + cmp ecx,256 + jb NEAR L$0061x + mov DWORD [516+esp],edx + mov DWORD [520+esp],ebx + sub ecx,256 + lea ebp,[384+esp] + movdqu xmm7,[edx] + pshufd xmm0,xmm3,0 + pshufd xmm1,xmm3,85 + pshufd xmm2,xmm3,170 + pshufd xmm3,xmm3,255 + paddd xmm0,[48+eax] + pshufd xmm4,xmm7,0 + pshufd xmm5,xmm7,85 + psubd xmm0,[64+eax] + pshufd xmm6,xmm7,170 + pshufd xmm7,xmm7,255 + movdqa [64+ebp],xmm0 + movdqa [80+ebp],xmm1 + movdqa [96+ebp],xmm2 + movdqa [112+ebp],xmm3 + movdqu xmm3,[16+edx] + movdqa [ebp-64],xmm4 + movdqa [ebp-48],xmm5 + movdqa [ebp-32],xmm6 + movdqa [ebp-16],xmm7 + movdqa xmm7,[32+eax] + lea ebx,[128+esp] + pshufd xmm0,xmm3,0 + pshufd xmm1,xmm3,85 + pshufd xmm2,xmm3,170 + pshufd xmm3,xmm3,255 + pshufd xmm4,xmm7,0 + pshufd xmm5,xmm7,85 + pshufd xmm6,xmm7,170 + pshufd xmm7,xmm7,255 + movdqa [ebp],xmm0 + movdqa [16+ebp],xmm1 + movdqa [32+ebp],xmm2 + movdqa [48+ebp],xmm3 + movdqa [ebp-128],xmm4 + movdqa [ebp-112],xmm5 + movdqa [ebp-96],xmm6 + movdqa [ebp-80],xmm7 + lea esi,[128+esi] + lea edi,[128+edi] + jmp NEAR L$007outer_loop +align 16 +L$007outer_loop: + movdqa xmm1,[ebp-112] + movdqa xmm2,[ebp-96] + movdqa xmm3,[ebp-80] + movdqa xmm5,[ebp-48] + movdqa xmm6,[ebp-32] + movdqa xmm7,[ebp-16] + movdqa [ebx-112],xmm1 + movdqa [ebx-96],xmm2 + movdqa [ebx-80],xmm3 + movdqa [ebx-48],xmm5 + movdqa [ebx-32],xmm6 + movdqa [ebx-16],xmm7 + movdqa xmm2,[32+ebp] + movdqa xmm3,[48+ebp] + movdqa xmm4,[64+ebp] + movdqa xmm5,[80+ebp] + movdqa xmm6,[96+ebp] + movdqa xmm7,[112+ebp] + paddd xmm4,[64+eax] + movdqa [32+ebx],xmm2 + movdqa [48+ebx],xmm3 + movdqa [64+ebx],xmm4 + movdqa [80+ebx],xmm5 + movdqa [96+ebx],xmm6 + movdqa [112+ebx],xmm7 + movdqa [64+ebp],xmm4 + movdqa xmm0,[ebp-128] + movdqa xmm6,xmm4 + movdqa xmm3,[ebp-64] + movdqa xmm4,[ebp] + movdqa xmm5,[16+ebp] + mov edx,10 + nop +align 16 +L$008loop: + paddd xmm0,xmm3 + movdqa xmm2,xmm3 + pxor xmm6,xmm0 + pshufb xmm6,[eax] + paddd xmm4,xmm6 + pxor xmm2,xmm4 + movdqa xmm3,[ebx-48] + movdqa xmm1,xmm2 + pslld xmm2,12 + psrld xmm1,20 + por xmm2,xmm1 + movdqa xmm1,[ebx-112] + paddd xmm0,xmm2 + movdqa xmm7,[80+ebx] + pxor xmm6,xmm0 + movdqa [ebx-128],xmm0 + pshufb xmm6,[16+eax] + paddd xmm4,xmm6 + movdqa [64+ebx],xmm6 + pxor xmm2,xmm4 + paddd xmm1,xmm3 + movdqa xmm0,xmm2 + pslld xmm2,7 + psrld xmm0,25 + pxor xmm7,xmm1 + por xmm2,xmm0 + movdqa [ebx],xmm4 + pshufb xmm7,[eax] + movdqa [ebx-64],xmm2 + paddd xmm5,xmm7 + movdqa xmm4,[32+ebx] + pxor xmm3,xmm5 + movdqa xmm2,[ebx-32] + movdqa xmm0,xmm3 + pslld xmm3,12 + psrld xmm0,20 + por xmm3,xmm0 + movdqa xmm0,[ebx-96] + paddd xmm1,xmm3 + movdqa xmm6,[96+ebx] + pxor xmm7,xmm1 + movdqa [ebx-112],xmm1 + pshufb xmm7,[16+eax] + paddd xmm5,xmm7 + movdqa [80+ebx],xmm7 + pxor xmm3,xmm5 + paddd xmm0,xmm2 + movdqa xmm1,xmm3 + pslld xmm3,7 + psrld xmm1,25 + pxor xmm6,xmm0 + por xmm3,xmm1 + movdqa [16+ebx],xmm5 + pshufb xmm6,[eax] + movdqa [ebx-48],xmm3 + paddd xmm4,xmm6 + movdqa xmm5,[48+ebx] + pxor xmm2,xmm4 + movdqa xmm3,[ebx-16] + movdqa xmm1,xmm2 + pslld xmm2,12 + psrld xmm1,20 + por xmm2,xmm1 + movdqa xmm1,[ebx-80] + paddd xmm0,xmm2 + movdqa xmm7,[112+ebx] + pxor xmm6,xmm0 + movdqa [ebx-96],xmm0 + pshufb xmm6,[16+eax] + paddd xmm4,xmm6 + movdqa [96+ebx],xmm6 + pxor xmm2,xmm4 + paddd xmm1,xmm3 + movdqa xmm0,xmm2 + pslld xmm2,7 + psrld xmm0,25 + pxor xmm7,xmm1 + por xmm2,xmm0 + pshufb xmm7,[eax] + movdqa [ebx-32],xmm2 + paddd xmm5,xmm7 + pxor xmm3,xmm5 + movdqa xmm2,[ebx-48] + movdqa xmm0,xmm3 + pslld xmm3,12 + psrld xmm0,20 + por xmm3,xmm0 + movdqa xmm0,[ebx-128] + paddd xmm1,xmm3 + pxor xmm7,xmm1 + movdqa [ebx-80],xmm1 + pshufb xmm7,[16+eax] + paddd xmm5,xmm7 + movdqa xmm6,xmm7 + pxor xmm3,xmm5 + paddd xmm0,xmm2 + movdqa xmm1,xmm3 + pslld xmm3,7 + psrld xmm1,25 + pxor xmm6,xmm0 + por xmm3,xmm1 + pshufb xmm6,[eax] + movdqa [ebx-16],xmm3 + paddd xmm4,xmm6 + pxor xmm2,xmm4 + movdqa xmm3,[ebx-32] + movdqa xmm1,xmm2 + pslld xmm2,12 + psrld xmm1,20 + por xmm2,xmm1 + movdqa xmm1,[ebx-112] + paddd xmm0,xmm2 + movdqa xmm7,[64+ebx] + pxor xmm6,xmm0 + movdqa [ebx-128],xmm0 + pshufb xmm6,[16+eax] + paddd xmm4,xmm6 + movdqa [112+ebx],xmm6 + pxor xmm2,xmm4 + paddd xmm1,xmm3 + movdqa xmm0,xmm2 + pslld xmm2,7 + psrld xmm0,25 + pxor xmm7,xmm1 + por xmm2,xmm0 + movdqa [32+ebx],xmm4 + pshufb xmm7,[eax] + movdqa [ebx-48],xmm2 + paddd xmm5,xmm7 + movdqa xmm4,[ebx] + pxor xmm3,xmm5 + movdqa xmm2,[ebx-16] + movdqa xmm0,xmm3 + pslld xmm3,12 + psrld xmm0,20 + por xmm3,xmm0 + movdqa xmm0,[ebx-96] + paddd xmm1,xmm3 + movdqa xmm6,[80+ebx] + pxor xmm7,xmm1 + movdqa [ebx-112],xmm1 + pshufb xmm7,[16+eax] + paddd xmm5,xmm7 + movdqa [64+ebx],xmm7 + pxor xmm3,xmm5 + paddd xmm0,xmm2 + movdqa xmm1,xmm3 + pslld xmm3,7 + psrld xmm1,25 + pxor xmm6,xmm0 + por xmm3,xmm1 + movdqa [48+ebx],xmm5 + pshufb xmm6,[eax] + movdqa [ebx-32],xmm3 + paddd xmm4,xmm6 + movdqa xmm5,[16+ebx] + pxor xmm2,xmm4 + movdqa xmm3,[ebx-64] + movdqa xmm1,xmm2 + pslld xmm2,12 + psrld xmm1,20 + por xmm2,xmm1 + movdqa xmm1,[ebx-80] + paddd xmm0,xmm2 + movdqa xmm7,[96+ebx] + pxor xmm6,xmm0 + movdqa [ebx-96],xmm0 + pshufb xmm6,[16+eax] + paddd xmm4,xmm6 + movdqa [80+ebx],xmm6 + pxor xmm2,xmm4 + paddd xmm1,xmm3 + movdqa xmm0,xmm2 + pslld xmm2,7 + psrld xmm0,25 + pxor xmm7,xmm1 + por xmm2,xmm0 + pshufb xmm7,[eax] + movdqa [ebx-16],xmm2 + paddd xmm5,xmm7 + pxor xmm3,xmm5 + movdqa xmm0,xmm3 + pslld xmm3,12 + psrld xmm0,20 + por xmm3,xmm0 + movdqa xmm0,[ebx-128] + paddd xmm1,xmm3 + movdqa xmm6,[64+ebx] + pxor xmm7,xmm1 + movdqa [ebx-80],xmm1 + pshufb xmm7,[16+eax] + paddd xmm5,xmm7 + movdqa [96+ebx],xmm7 + pxor xmm3,xmm5 + movdqa xmm1,xmm3 + pslld xmm3,7 + psrld xmm1,25 + por xmm3,xmm1 + dec edx + jnz NEAR L$008loop + movdqa [ebx-64],xmm3 + movdqa [ebx],xmm4 + movdqa [16+ebx],xmm5 + movdqa [64+ebx],xmm6 + movdqa [96+ebx],xmm7 + movdqa xmm1,[ebx-112] + movdqa xmm2,[ebx-96] + movdqa xmm3,[ebx-80] + paddd xmm0,[ebp-128] + paddd xmm1,[ebp-112] + paddd xmm2,[ebp-96] + paddd xmm3,[ebp-80] + movdqa xmm6,xmm0 + punpckldq xmm0,xmm1 + movdqa xmm7,xmm2 + punpckldq xmm2,xmm3 + punpckhdq xmm6,xmm1 + punpckhdq xmm7,xmm3 + movdqa xmm1,xmm0 + punpcklqdq xmm0,xmm2 + movdqa xmm3,xmm6 + punpcklqdq xmm6,xmm7 + punpckhqdq xmm1,xmm2 + punpckhqdq xmm3,xmm7 + movdqu xmm4,[esi-128] + movdqu xmm5,[esi-64] + movdqu xmm2,[esi] + movdqu xmm7,[64+esi] + lea esi,[16+esi] + pxor xmm4,xmm0 + movdqa xmm0,[ebx-64] + pxor xmm5,xmm1 + movdqa xmm1,[ebx-48] + pxor xmm6,xmm2 + movdqa xmm2,[ebx-32] + pxor xmm7,xmm3 + movdqa xmm3,[ebx-16] + movdqu [edi-128],xmm4 + movdqu [edi-64],xmm5 + movdqu [edi],xmm6 + movdqu [64+edi],xmm7 + lea edi,[16+edi] + paddd xmm0,[ebp-64] + paddd xmm1,[ebp-48] + paddd xmm2,[ebp-32] + paddd xmm3,[ebp-16] + movdqa xmm6,xmm0 + punpckldq xmm0,xmm1 + movdqa xmm7,xmm2 + punpckldq xmm2,xmm3 + punpckhdq xmm6,xmm1 + punpckhdq xmm7,xmm3 + movdqa xmm1,xmm0 + punpcklqdq xmm0,xmm2 + movdqa xmm3,xmm6 + punpcklqdq xmm6,xmm7 + punpckhqdq xmm1,xmm2 + punpckhqdq xmm3,xmm7 + movdqu xmm4,[esi-128] + movdqu xmm5,[esi-64] + movdqu xmm2,[esi] + movdqu xmm7,[64+esi] + lea esi,[16+esi] + pxor xmm4,xmm0 + movdqa xmm0,[ebx] + pxor xmm5,xmm1 + movdqa xmm1,[16+ebx] + pxor xmm6,xmm2 + movdqa xmm2,[32+ebx] + pxor xmm7,xmm3 + movdqa xmm3,[48+ebx] + movdqu [edi-128],xmm4 + movdqu [edi-64],xmm5 + movdqu [edi],xmm6 + movdqu [64+edi],xmm7 + lea edi,[16+edi] + paddd xmm0,[ebp] + paddd xmm1,[16+ebp] + paddd xmm2,[32+ebp] + paddd xmm3,[48+ebp] + movdqa xmm6,xmm0 + punpckldq xmm0,xmm1 + movdqa xmm7,xmm2 + punpckldq xmm2,xmm3 + punpckhdq xmm6,xmm1 + punpckhdq xmm7,xmm3 + movdqa xmm1,xmm0 + punpcklqdq xmm0,xmm2 + movdqa xmm3,xmm6 + punpcklqdq xmm6,xmm7 + punpckhqdq xmm1,xmm2 + punpckhqdq xmm3,xmm7 + movdqu xmm4,[esi-128] + movdqu xmm5,[esi-64] + movdqu xmm2,[esi] + movdqu xmm7,[64+esi] + lea esi,[16+esi] + pxor xmm4,xmm0 + movdqa xmm0,[64+ebx] + pxor xmm5,xmm1 + movdqa xmm1,[80+ebx] + pxor xmm6,xmm2 + movdqa xmm2,[96+ebx] + pxor xmm7,xmm3 + movdqa xmm3,[112+ebx] + movdqu [edi-128],xmm4 + movdqu [edi-64],xmm5 + movdqu [edi],xmm6 + movdqu [64+edi],xmm7 + lea edi,[16+edi] + paddd xmm0,[64+ebp] + paddd xmm1,[80+ebp] + paddd xmm2,[96+ebp] + paddd xmm3,[112+ebp] + movdqa xmm6,xmm0 + punpckldq xmm0,xmm1 + movdqa xmm7,xmm2 + punpckldq xmm2,xmm3 + punpckhdq xmm6,xmm1 + punpckhdq xmm7,xmm3 + movdqa xmm1,xmm0 + punpcklqdq xmm0,xmm2 + movdqa xmm3,xmm6 + punpcklqdq xmm6,xmm7 + punpckhqdq xmm1,xmm2 + punpckhqdq xmm3,xmm7 + movdqu xmm4,[esi-128] + movdqu xmm5,[esi-64] + movdqu xmm2,[esi] + movdqu xmm7,[64+esi] + lea esi,[208+esi] + pxor xmm4,xmm0 + pxor xmm5,xmm1 + pxor xmm6,xmm2 + pxor xmm7,xmm3 + movdqu [edi-128],xmm4 + movdqu [edi-64],xmm5 + movdqu [edi],xmm6 + movdqu [64+edi],xmm7 + lea edi,[208+edi] + sub ecx,256 + jnc NEAR L$007outer_loop + add ecx,256 + jz NEAR L$009done + mov ebx,DWORD [520+esp] + lea esi,[esi-128] + mov edx,DWORD [516+esp] + lea edi,[edi-128] + movd xmm2,DWORD [64+ebp] + movdqu xmm3,[ebx] + paddd xmm2,[96+eax] + pand xmm3,[112+eax] + por xmm3,xmm2 +L$0061x: + movdqa xmm0,[32+eax] + movdqu xmm1,[edx] + movdqu xmm2,[16+edx] + movdqa xmm6,[eax] + movdqa xmm7,[16+eax] + mov DWORD [48+esp],ebp + movdqa [esp],xmm0 + movdqa [16+esp],xmm1 + movdqa [32+esp],xmm2 + movdqa [48+esp],xmm3 + mov edx,10 + jmp NEAR L$010loop1x +align 16 +L$011outer1x: + movdqa xmm3,[80+eax] + movdqa xmm0,[esp] + movdqa xmm1,[16+esp] + movdqa xmm2,[32+esp] + paddd xmm3,[48+esp] + mov edx,10 + movdqa [48+esp],xmm3 + jmp NEAR L$010loop1x +align 16 +L$010loop1x: + paddd xmm0,xmm1 + pxor xmm3,xmm0 +db 102,15,56,0,222 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,20 + pslld xmm4,12 + por xmm1,xmm4 + paddd xmm0,xmm1 + pxor xmm3,xmm0 +db 102,15,56,0,223 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,25 + pslld xmm4,7 + por xmm1,xmm4 + pshufd xmm2,xmm2,78 + pshufd xmm1,xmm1,57 + pshufd xmm3,xmm3,147 + nop + paddd xmm0,xmm1 + pxor xmm3,xmm0 +db 102,15,56,0,222 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,20 + pslld xmm4,12 + por xmm1,xmm4 + paddd xmm0,xmm1 + pxor xmm3,xmm0 +db 102,15,56,0,223 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,25 + pslld xmm4,7 + por xmm1,xmm4 + pshufd xmm2,xmm2,78 + pshufd xmm1,xmm1,147 + pshufd xmm3,xmm3,57 + dec edx + jnz NEAR L$010loop1x + paddd xmm0,[esp] + paddd xmm1,[16+esp] + paddd xmm2,[32+esp] + paddd xmm3,[48+esp] + cmp ecx,64 + jb NEAR L$012tail + movdqu xmm4,[esi] + movdqu xmm5,[16+esi] + pxor xmm0,xmm4 + movdqu xmm4,[32+esi] + pxor xmm1,xmm5 + movdqu xmm5,[48+esi] + pxor xmm2,xmm4 + pxor xmm3,xmm5 + lea esi,[64+esi] + movdqu [edi],xmm0 + movdqu [16+edi],xmm1 + movdqu [32+edi],xmm2 + movdqu [48+edi],xmm3 + lea edi,[64+edi] + sub ecx,64 + jnz NEAR L$011outer1x + jmp NEAR L$009done +L$012tail: + movdqa [esp],xmm0 + movdqa [16+esp],xmm1 + movdqa [32+esp],xmm2 + movdqa [48+esp],xmm3 + xor eax,eax + xor edx,edx + xor ebp,ebp +L$013tail_loop: + mov al,BYTE [ebp*1+esp] + mov dl,BYTE [ebp*1+esi] + lea ebp,[1+ebp] + xor al,dl + mov BYTE [ebp*1+edi-1],al + dec ecx + jnz NEAR L$013tail_loop +L$009done: + mov esp,DWORD [512+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$ssse3_data: +db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +dd 1634760805,857760878,2036477234,1797285236 +dd 0,1,2,3 +dd 4,4,4,4 +dd 1,0,0,0 +dd 4,0,0,0 +dd 0,-1,-1,-1 +align 64 +db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 +db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +db 114,103,62,0 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-apple.S b/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-apple.S new file mode 100644 index 0000000000..a5e12075e3 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-apple.S @@ -0,0 +1,1604 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + +.section __DATA,__const +.p2align 6 +L$zero: +.long 0,0,0,0 +L$one: +.long 1,0,0,0 +L$inc: +.long 0,1,2,3 +L$four: +.long 4,4,4,4 +L$incy: +.long 0,2,4,6,1,3,5,7 +L$eight: +.long 8,8,8,8,8,8,8,8 +L$rot16: +.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd +L$rot24: +.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe +L$sigma: +.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 +.p2align 6 +L$zeroz: +.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 +L$fourz: +.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 +L$incz: +.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +L$sixteen: +.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +.globl _ChaCha20_ctr32_nohw +.private_extern _ChaCha20_ctr32_nohw + +.p2align 6 +_ChaCha20_ctr32_nohw: + +_CET_ENDBR + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $64+24,%rsp + +L$ctr32_body: + + + movdqu (%rcx),%xmm1 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa L$one(%rip),%xmm4 + + + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + movq %rdx,%rbp + jmp L$oop_outer + +.p2align 5 +L$oop_outer: + movl $0x61707865,%eax + movl $0x3320646e,%ebx + movl $0x79622d32,%ecx + movl $0x6b206574,%edx + movl 16(%rsp),%r8d + movl 20(%rsp),%r9d + movl 24(%rsp),%r10d + movl 28(%rsp),%r11d + movd %xmm3,%r12d + movl 52(%rsp),%r13d + movl 56(%rsp),%r14d + movl 60(%rsp),%r15d + + movq %rbp,64+0(%rsp) + movl $10,%ebp + movq %rsi,64+8(%rsp) +.byte 102,72,15,126,214 + movq %rdi,64+16(%rsp) + movq %rsi,%rdi + shrq $32,%rdi + jmp L$oop + +.p2align 5 +L$oop: + addl %r8d,%eax + xorl %eax,%r12d + roll $16,%r12d + addl %r9d,%ebx + xorl %ebx,%r13d + roll $16,%r13d + addl %r12d,%esi + xorl %esi,%r8d + roll $12,%r8d + addl %r13d,%edi + xorl %edi,%r9d + roll $12,%r9d + addl %r8d,%eax + xorl %eax,%r12d + roll $8,%r12d + addl %r9d,%ebx + xorl %ebx,%r13d + roll $8,%r13d + addl %r12d,%esi + xorl %esi,%r8d + roll $7,%r8d + addl %r13d,%edi + xorl %edi,%r9d + roll $7,%r9d + movl %esi,32(%rsp) + movl %edi,36(%rsp) + movl 40(%rsp),%esi + movl 44(%rsp),%edi + addl %r10d,%ecx + xorl %ecx,%r14d + roll $16,%r14d + addl %r11d,%edx + xorl %edx,%r15d + roll $16,%r15d + addl %r14d,%esi + xorl %esi,%r10d + roll $12,%r10d + addl %r15d,%edi + xorl %edi,%r11d + roll $12,%r11d + addl %r10d,%ecx + xorl %ecx,%r14d + roll $8,%r14d + addl %r11d,%edx + xorl %edx,%r15d + roll $8,%r15d + addl %r14d,%esi + xorl %esi,%r10d + roll $7,%r10d + addl %r15d,%edi + xorl %edi,%r11d + roll $7,%r11d + addl %r9d,%eax + xorl %eax,%r15d + roll $16,%r15d + addl %r10d,%ebx + xorl %ebx,%r12d + roll $16,%r12d + addl %r15d,%esi + xorl %esi,%r9d + roll $12,%r9d + addl %r12d,%edi + xorl %edi,%r10d + roll $12,%r10d + addl %r9d,%eax + xorl %eax,%r15d + roll $8,%r15d + addl %r10d,%ebx + xorl %ebx,%r12d + roll $8,%r12d + addl %r15d,%esi + xorl %esi,%r9d + roll $7,%r9d + addl %r12d,%edi + xorl %edi,%r10d + roll $7,%r10d + movl %esi,40(%rsp) + movl %edi,44(%rsp) + movl 32(%rsp),%esi + movl 36(%rsp),%edi + addl %r11d,%ecx + xorl %ecx,%r13d + roll $16,%r13d + addl %r8d,%edx + xorl %edx,%r14d + roll $16,%r14d + addl %r13d,%esi + xorl %esi,%r11d + roll $12,%r11d + addl %r14d,%edi + xorl %edi,%r8d + roll $12,%r8d + addl %r11d,%ecx + xorl %ecx,%r13d + roll $8,%r13d + addl %r8d,%edx + xorl %edx,%r14d + roll $8,%r14d + addl %r13d,%esi + xorl %esi,%r11d + roll $7,%r11d + addl %r14d,%edi + xorl %edi,%r8d + roll $7,%r8d + decl %ebp + jnz L$oop + movl %edi,36(%rsp) + movl %esi,32(%rsp) + movq 64(%rsp),%rbp + movdqa %xmm2,%xmm1 + movq 64+8(%rsp),%rsi + paddd %xmm4,%xmm3 + movq 64+16(%rsp),%rdi + + addl $0x61707865,%eax + addl $0x3320646e,%ebx + addl $0x79622d32,%ecx + addl $0x6b206574,%edx + addl 16(%rsp),%r8d + addl 20(%rsp),%r9d + addl 24(%rsp),%r10d + addl 28(%rsp),%r11d + addl 48(%rsp),%r12d + addl 52(%rsp),%r13d + addl 56(%rsp),%r14d + addl 60(%rsp),%r15d + paddd 32(%rsp),%xmm1 + + cmpq $64,%rbp + jb L$tail + + xorl 0(%rsi),%eax + xorl 4(%rsi),%ebx + xorl 8(%rsi),%ecx + xorl 12(%rsi),%edx + xorl 16(%rsi),%r8d + xorl 20(%rsi),%r9d + xorl 24(%rsi),%r10d + xorl 28(%rsi),%r11d + movdqu 32(%rsi),%xmm0 + xorl 48(%rsi),%r12d + xorl 52(%rsi),%r13d + xorl 56(%rsi),%r14d + xorl 60(%rsi),%r15d + leaq 64(%rsi),%rsi + pxor %xmm1,%xmm0 + + movdqa %xmm2,32(%rsp) + movd %xmm3,48(%rsp) + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + movdqu %xmm0,32(%rdi) + movl %r12d,48(%rdi) + movl %r13d,52(%rdi) + movl %r14d,56(%rdi) + movl %r15d,60(%rdi) + leaq 64(%rdi),%rdi + + subq $64,%rbp + jnz L$oop_outer + + jmp L$done + +.p2align 4 +L$tail: + movl %eax,0(%rsp) + movl %ebx,4(%rsp) + xorq %rbx,%rbx + movl %ecx,8(%rsp) + movl %edx,12(%rsp) + movl %r8d,16(%rsp) + movl %r9d,20(%rsp) + movl %r10d,24(%rsp) + movl %r11d,28(%rsp) + movdqa %xmm1,32(%rsp) + movl %r12d,48(%rsp) + movl %r13d,52(%rsp) + movl %r14d,56(%rsp) + movl %r15d,60(%rsp) + +L$oop_tail: + movzbl (%rsi,%rbx,1),%eax + movzbl (%rsp,%rbx,1),%edx + leaq 1(%rbx),%rbx + xorl %edx,%eax + movb %al,-1(%rdi,%rbx,1) + decq %rbp + jnz L$oop_tail + +L$done: + leaq 64+24+48(%rsp),%rsi + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$no_data: + ret + + +.globl _ChaCha20_ctr32_ssse3 +.private_extern _ChaCha20_ctr32_ssse3 + +.p2align 5 +_ChaCha20_ctr32_ssse3: + +_CET_ENDBR + movq %rsp,%r9 + + subq $64+8,%rsp + movdqa L$sigma(%rip),%xmm0 + movdqu (%rcx),%xmm1 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa L$rot16(%rip),%xmm6 + movdqa L$rot24(%rip),%xmm7 + + movdqa %xmm0,0(%rsp) + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + movq $10,%r8 + jmp L$oop_ssse3 + +.p2align 5 +L$oop_outer_ssse3: + movdqa L$one(%rip),%xmm3 + movdqa 0(%rsp),%xmm0 + movdqa 16(%rsp),%xmm1 + movdqa 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + movq $10,%r8 + movdqa %xmm3,48(%rsp) + jmp L$oop_ssse3 + +.p2align 5 +L$oop_ssse3: + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm1,%xmm1 + pshufd $147,%xmm3,%xmm3 + nop + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm1,%xmm1 + pshufd $57,%xmm3,%xmm3 + decq %r8 + jnz L$oop_ssse3 + paddd 0(%rsp),%xmm0 + paddd 16(%rsp),%xmm1 + paddd 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + + cmpq $64,%rdx + jb L$tail_ssse3 + + movdqu 0(%rsi),%xmm4 + movdqu 16(%rsi),%xmm5 + pxor %xmm4,%xmm0 + movdqu 32(%rsi),%xmm4 + pxor %xmm5,%xmm1 + movdqu 48(%rsi),%xmm5 + leaq 64(%rsi),%rsi + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm3 + + movdqu %xmm0,0(%rdi) + movdqu %xmm1,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + leaq 64(%rdi),%rdi + + subq $64,%rdx + jnz L$oop_outer_ssse3 + + jmp L$done_ssse3 + +.p2align 4 +L$tail_ssse3: + movdqa %xmm0,0(%rsp) + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + xorq %r8,%r8 + +L$oop_tail_ssse3: + movzbl (%rsi,%r8,1),%eax + movzbl (%rsp,%r8,1),%ecx + leaq 1(%r8),%r8 + xorl %ecx,%eax + movb %al,-1(%rdi,%r8,1) + decq %rdx + jnz L$oop_tail_ssse3 + +L$done_ssse3: + leaq (%r9),%rsp + +L$ssse3_epilogue: + ret + + +.globl _ChaCha20_ctr32_ssse3_4x +.private_extern _ChaCha20_ctr32_ssse3_4x + +.p2align 5 +_ChaCha20_ctr32_ssse3_4x: + +_CET_ENDBR + movq %rsp,%r9 + + movq %r10,%r11 + subq $0x140+8,%rsp + movdqa L$sigma(%rip),%xmm11 + movdqu (%rcx),%xmm15 + movdqu 16(%rcx),%xmm7 + movdqu (%r8),%xmm3 + leaq 256(%rsp),%rcx + leaq L$rot16(%rip),%r10 + leaq L$rot24(%rip),%r11 + + pshufd $0x00,%xmm11,%xmm8 + pshufd $0x55,%xmm11,%xmm9 + movdqa %xmm8,64(%rsp) + pshufd $0xaa,%xmm11,%xmm10 + movdqa %xmm9,80(%rsp) + pshufd $0xff,%xmm11,%xmm11 + movdqa %xmm10,96(%rsp) + movdqa %xmm11,112(%rsp) + + pshufd $0x00,%xmm15,%xmm12 + pshufd $0x55,%xmm15,%xmm13 + movdqa %xmm12,128-256(%rcx) + pshufd $0xaa,%xmm15,%xmm14 + movdqa %xmm13,144-256(%rcx) + pshufd $0xff,%xmm15,%xmm15 + movdqa %xmm14,160-256(%rcx) + movdqa %xmm15,176-256(%rcx) + + pshufd $0x00,%xmm7,%xmm4 + pshufd $0x55,%xmm7,%xmm5 + movdqa %xmm4,192-256(%rcx) + pshufd $0xaa,%xmm7,%xmm6 + movdqa %xmm5,208-256(%rcx) + pshufd $0xff,%xmm7,%xmm7 + movdqa %xmm6,224-256(%rcx) + movdqa %xmm7,240-256(%rcx) + + pshufd $0x00,%xmm3,%xmm0 + pshufd $0x55,%xmm3,%xmm1 + paddd L$inc(%rip),%xmm0 + pshufd $0xaa,%xmm3,%xmm2 + movdqa %xmm1,272-256(%rcx) + pshufd $0xff,%xmm3,%xmm3 + movdqa %xmm2,288-256(%rcx) + movdqa %xmm3,304-256(%rcx) + + jmp L$oop_enter4x + +.p2align 5 +L$oop_outer4x: + movdqa 64(%rsp),%xmm8 + movdqa 80(%rsp),%xmm9 + movdqa 96(%rsp),%xmm10 + movdqa 112(%rsp),%xmm11 + movdqa 128-256(%rcx),%xmm12 + movdqa 144-256(%rcx),%xmm13 + movdqa 160-256(%rcx),%xmm14 + movdqa 176-256(%rcx),%xmm15 + movdqa 192-256(%rcx),%xmm4 + movdqa 208-256(%rcx),%xmm5 + movdqa 224-256(%rcx),%xmm6 + movdqa 240-256(%rcx),%xmm7 + movdqa 256-256(%rcx),%xmm0 + movdqa 272-256(%rcx),%xmm1 + movdqa 288-256(%rcx),%xmm2 + movdqa 304-256(%rcx),%xmm3 + paddd L$four(%rip),%xmm0 + +L$oop_enter4x: + movdqa %xmm6,32(%rsp) + movdqa %xmm7,48(%rsp) + movdqa (%r10),%xmm7 + movl $10,%eax + movdqa %xmm0,256-256(%rcx) + jmp L$oop4x + +.p2align 5 +L$oop4x: + paddd %xmm12,%xmm8 + paddd %xmm13,%xmm9 + pxor %xmm8,%xmm0 + pxor %xmm9,%xmm1 +.byte 102,15,56,0,199 +.byte 102,15,56,0,207 + paddd %xmm0,%xmm4 + paddd %xmm1,%xmm5 + pxor %xmm4,%xmm12 + pxor %xmm5,%xmm13 + movdqa %xmm12,%xmm6 + pslld $12,%xmm12 + psrld $20,%xmm6 + movdqa %xmm13,%xmm7 + pslld $12,%xmm13 + por %xmm6,%xmm12 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm13 + paddd %xmm12,%xmm8 + paddd %xmm13,%xmm9 + pxor %xmm8,%xmm0 + pxor %xmm9,%xmm1 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 + paddd %xmm0,%xmm4 + paddd %xmm1,%xmm5 + pxor %xmm4,%xmm12 + pxor %xmm5,%xmm13 + movdqa %xmm12,%xmm7 + pslld $7,%xmm12 + psrld $25,%xmm7 + movdqa %xmm13,%xmm6 + pslld $7,%xmm13 + por %xmm7,%xmm12 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm13 + movdqa %xmm4,0(%rsp) + movdqa %xmm5,16(%rsp) + movdqa 32(%rsp),%xmm4 + movdqa 48(%rsp),%xmm5 + paddd %xmm14,%xmm10 + paddd %xmm15,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm3 +.byte 102,15,56,0,215 +.byte 102,15,56,0,223 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + pxor %xmm4,%xmm14 + pxor %xmm5,%xmm15 + movdqa %xmm14,%xmm6 + pslld $12,%xmm14 + psrld $20,%xmm6 + movdqa %xmm15,%xmm7 + pslld $12,%xmm15 + por %xmm6,%xmm14 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm15 + paddd %xmm14,%xmm10 + paddd %xmm15,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm3 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + pxor %xmm4,%xmm14 + pxor %xmm5,%xmm15 + movdqa %xmm14,%xmm7 + pslld $7,%xmm14 + psrld $25,%xmm7 + movdqa %xmm15,%xmm6 + pslld $7,%xmm15 + por %xmm7,%xmm14 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm15 + paddd %xmm13,%xmm8 + paddd %xmm14,%xmm9 + pxor %xmm8,%xmm3 + pxor %xmm9,%xmm0 +.byte 102,15,56,0,223 +.byte 102,15,56,0,199 + paddd %xmm3,%xmm4 + paddd %xmm0,%xmm5 + pxor %xmm4,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm13,%xmm6 + pslld $12,%xmm13 + psrld $20,%xmm6 + movdqa %xmm14,%xmm7 + pslld $12,%xmm14 + por %xmm6,%xmm13 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm14 + paddd %xmm13,%xmm8 + paddd %xmm14,%xmm9 + pxor %xmm8,%xmm3 + pxor %xmm9,%xmm0 +.byte 102,15,56,0,222 +.byte 102,15,56,0,198 + paddd %xmm3,%xmm4 + paddd %xmm0,%xmm5 + pxor %xmm4,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm13,%xmm7 + pslld $7,%xmm13 + psrld $25,%xmm7 + movdqa %xmm14,%xmm6 + pslld $7,%xmm14 + por %xmm7,%xmm13 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm14 + movdqa %xmm4,32(%rsp) + movdqa %xmm5,48(%rsp) + movdqa 0(%rsp),%xmm4 + movdqa 16(%rsp),%xmm5 + paddd %xmm15,%xmm10 + paddd %xmm12,%xmm11 + pxor %xmm10,%xmm1 + pxor %xmm11,%xmm2 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + paddd %xmm1,%xmm4 + paddd %xmm2,%xmm5 + pxor %xmm4,%xmm15 + pxor %xmm5,%xmm12 + movdqa %xmm15,%xmm6 + pslld $12,%xmm15 + psrld $20,%xmm6 + movdqa %xmm12,%xmm7 + pslld $12,%xmm12 + por %xmm6,%xmm15 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm12 + paddd %xmm15,%xmm10 + paddd %xmm12,%xmm11 + pxor %xmm10,%xmm1 + pxor %xmm11,%xmm2 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + paddd %xmm1,%xmm4 + paddd %xmm2,%xmm5 + pxor %xmm4,%xmm15 + pxor %xmm5,%xmm12 + movdqa %xmm15,%xmm7 + pslld $7,%xmm15 + psrld $25,%xmm7 + movdqa %xmm12,%xmm6 + pslld $7,%xmm12 + por %xmm7,%xmm15 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm12 + decl %eax + jnz L$oop4x + + paddd 64(%rsp),%xmm8 + paddd 80(%rsp),%xmm9 + paddd 96(%rsp),%xmm10 + paddd 112(%rsp),%xmm11 + + movdqa %xmm8,%xmm6 + punpckldq %xmm9,%xmm8 + movdqa %xmm10,%xmm7 + punpckldq %xmm11,%xmm10 + punpckhdq %xmm9,%xmm6 + punpckhdq %xmm11,%xmm7 + movdqa %xmm8,%xmm9 + punpcklqdq %xmm10,%xmm8 + movdqa %xmm6,%xmm11 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm10,%xmm9 + punpckhqdq %xmm7,%xmm11 + paddd 128-256(%rcx),%xmm12 + paddd 144-256(%rcx),%xmm13 + paddd 160-256(%rcx),%xmm14 + paddd 176-256(%rcx),%xmm15 + + movdqa %xmm8,0(%rsp) + movdqa %xmm9,16(%rsp) + movdqa 32(%rsp),%xmm8 + movdqa 48(%rsp),%xmm9 + + movdqa %xmm12,%xmm10 + punpckldq %xmm13,%xmm12 + movdqa %xmm14,%xmm7 + punpckldq %xmm15,%xmm14 + punpckhdq %xmm13,%xmm10 + punpckhdq %xmm15,%xmm7 + movdqa %xmm12,%xmm13 + punpcklqdq %xmm14,%xmm12 + movdqa %xmm10,%xmm15 + punpcklqdq %xmm7,%xmm10 + punpckhqdq %xmm14,%xmm13 + punpckhqdq %xmm7,%xmm15 + paddd 192-256(%rcx),%xmm4 + paddd 208-256(%rcx),%xmm5 + paddd 224-256(%rcx),%xmm8 + paddd 240-256(%rcx),%xmm9 + + movdqa %xmm6,32(%rsp) + movdqa %xmm11,48(%rsp) + + movdqa %xmm4,%xmm14 + punpckldq %xmm5,%xmm4 + movdqa %xmm8,%xmm7 + punpckldq %xmm9,%xmm8 + punpckhdq %xmm5,%xmm14 + punpckhdq %xmm9,%xmm7 + movdqa %xmm4,%xmm5 + punpcklqdq %xmm8,%xmm4 + movdqa %xmm14,%xmm9 + punpcklqdq %xmm7,%xmm14 + punpckhqdq %xmm8,%xmm5 + punpckhqdq %xmm7,%xmm9 + paddd 256-256(%rcx),%xmm0 + paddd 272-256(%rcx),%xmm1 + paddd 288-256(%rcx),%xmm2 + paddd 304-256(%rcx),%xmm3 + + movdqa %xmm0,%xmm8 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm8 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm8,%xmm3 + punpcklqdq %xmm7,%xmm8 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + cmpq $256,%rdx + jb L$tail4x + + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + + movdqu %xmm6,64(%rdi) + movdqu 0(%rsi),%xmm6 + movdqu %xmm11,80(%rdi) + movdqu 16(%rsi),%xmm11 + movdqu %xmm2,96(%rdi) + movdqu 32(%rsi),%xmm2 + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + movdqu 48(%rsi),%xmm7 + pxor 32(%rsp),%xmm6 + pxor %xmm10,%xmm11 + pxor %xmm14,%xmm2 + pxor %xmm8,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 48(%rsp),%xmm6 + pxor %xmm15,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm3,%xmm7 + movdqu %xmm6,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm2,96(%rdi) + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + + subq $256,%rdx + jnz L$oop_outer4x + + jmp L$done4x + +L$tail4x: + cmpq $192,%rdx + jae L$192_or_more4x + cmpq $128,%rdx + jae L$128_or_more4x + cmpq $64,%rdx + jae L$64_or_more4x + + + xorq %r10,%r10 + + movdqa %xmm12,16(%rsp) + movdqa %xmm4,32(%rsp) + movdqa %xmm0,48(%rsp) + jmp L$oop_tail4x + +.p2align 5 +L$64_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + movdqu %xmm6,0(%rdi) + movdqu %xmm11,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm7,48(%rdi) + je L$done4x + + movdqa 16(%rsp),%xmm6 + leaq 64(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm13,16(%rsp) + leaq 64(%rdi),%rdi + movdqa %xmm5,32(%rsp) + subq $64,%rdx + movdqa %xmm1,48(%rsp) + jmp L$oop_tail4x + +.p2align 5 +L$128_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + movdqu %xmm6,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm2,96(%rdi) + movdqu %xmm7,112(%rdi) + je L$done4x + + movdqa 32(%rsp),%xmm6 + leaq 128(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm10,16(%rsp) + leaq 128(%rdi),%rdi + movdqa %xmm14,32(%rsp) + subq $128,%rdx + movdqa %xmm8,48(%rsp) + jmp L$oop_tail4x + +.p2align 5 +L$192_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + + movdqu %xmm6,64(%rdi) + movdqu 0(%rsi),%xmm6 + movdqu %xmm11,80(%rdi) + movdqu 16(%rsi),%xmm11 + movdqu %xmm2,96(%rdi) + movdqu 32(%rsi),%xmm2 + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + movdqu 48(%rsi),%xmm7 + pxor 32(%rsp),%xmm6 + pxor %xmm10,%xmm11 + pxor %xmm14,%xmm2 + pxor %xmm8,%xmm7 + movdqu %xmm6,0(%rdi) + movdqu %xmm11,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm7,48(%rdi) + je L$done4x + + movdqa 48(%rsp),%xmm6 + leaq 64(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm15,16(%rsp) + leaq 64(%rdi),%rdi + movdqa %xmm9,32(%rsp) + subq $192,%rdx + movdqa %xmm3,48(%rsp) + +L$oop_tail4x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz L$oop_tail4x + +L$done4x: + leaq (%r9),%rsp + +L$4x_epilogue: + ret + + +.globl _ChaCha20_ctr32_avx2 +.private_extern _ChaCha20_ctr32_avx2 + +.p2align 5 +_ChaCha20_ctr32_avx2: + +_CET_ENDBR + movq %rsp,%r9 + + subq $0x280+8,%rsp + andq $-32,%rsp + vzeroupper + + + + + + + + + + + vbroadcasti128 L$sigma(%rip),%ymm11 + vbroadcasti128 (%rcx),%ymm3 + vbroadcasti128 16(%rcx),%ymm15 + vbroadcasti128 (%r8),%ymm7 + leaq 256(%rsp),%rcx + leaq 512(%rsp),%rax + leaq L$rot16(%rip),%r10 + leaq L$rot24(%rip),%r11 + + vpshufd $0x00,%ymm11,%ymm8 + vpshufd $0x55,%ymm11,%ymm9 + vmovdqa %ymm8,128-256(%rcx) + vpshufd $0xaa,%ymm11,%ymm10 + vmovdqa %ymm9,160-256(%rcx) + vpshufd $0xff,%ymm11,%ymm11 + vmovdqa %ymm10,192-256(%rcx) + vmovdqa %ymm11,224-256(%rcx) + + vpshufd $0x00,%ymm3,%ymm0 + vpshufd $0x55,%ymm3,%ymm1 + vmovdqa %ymm0,256-256(%rcx) + vpshufd $0xaa,%ymm3,%ymm2 + vmovdqa %ymm1,288-256(%rcx) + vpshufd $0xff,%ymm3,%ymm3 + vmovdqa %ymm2,320-256(%rcx) + vmovdqa %ymm3,352-256(%rcx) + + vpshufd $0x00,%ymm15,%ymm12 + vpshufd $0x55,%ymm15,%ymm13 + vmovdqa %ymm12,384-512(%rax) + vpshufd $0xaa,%ymm15,%ymm14 + vmovdqa %ymm13,416-512(%rax) + vpshufd $0xff,%ymm15,%ymm15 + vmovdqa %ymm14,448-512(%rax) + vmovdqa %ymm15,480-512(%rax) + + vpshufd $0x00,%ymm7,%ymm4 + vpshufd $0x55,%ymm7,%ymm5 + vpaddd L$incy(%rip),%ymm4,%ymm4 + vpshufd $0xaa,%ymm7,%ymm6 + vmovdqa %ymm5,544-512(%rax) + vpshufd $0xff,%ymm7,%ymm7 + vmovdqa %ymm6,576-512(%rax) + vmovdqa %ymm7,608-512(%rax) + + jmp L$oop_enter8x + +.p2align 5 +L$oop_outer8x: + vmovdqa 128-256(%rcx),%ymm8 + vmovdqa 160-256(%rcx),%ymm9 + vmovdqa 192-256(%rcx),%ymm10 + vmovdqa 224-256(%rcx),%ymm11 + vmovdqa 256-256(%rcx),%ymm0 + vmovdqa 288-256(%rcx),%ymm1 + vmovdqa 320-256(%rcx),%ymm2 + vmovdqa 352-256(%rcx),%ymm3 + vmovdqa 384-512(%rax),%ymm12 + vmovdqa 416-512(%rax),%ymm13 + vmovdqa 448-512(%rax),%ymm14 + vmovdqa 480-512(%rax),%ymm15 + vmovdqa 512-512(%rax),%ymm4 + vmovdqa 544-512(%rax),%ymm5 + vmovdqa 576-512(%rax),%ymm6 + vmovdqa 608-512(%rax),%ymm7 + vpaddd L$eight(%rip),%ymm4,%ymm4 + +L$oop_enter8x: + vmovdqa %ymm14,64(%rsp) + vmovdqa %ymm15,96(%rsp) + vbroadcasti128 (%r10),%ymm15 + vmovdqa %ymm4,512-512(%rax) + movl $10,%eax + jmp L$oop8x + +.p2align 5 +L$oop8x: + vpaddd %ymm0,%ymm8,%ymm8 + vpxor %ymm4,%ymm8,%ymm4 + vpshufb %ymm15,%ymm4,%ymm4 + vpaddd %ymm1,%ymm9,%ymm9 + vpxor %ymm5,%ymm9,%ymm5 + vpshufb %ymm15,%ymm5,%ymm5 + vpaddd %ymm4,%ymm12,%ymm12 + vpxor %ymm0,%ymm12,%ymm0 + vpslld $12,%ymm0,%ymm14 + vpsrld $20,%ymm0,%ymm0 + vpor %ymm0,%ymm14,%ymm0 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm5,%ymm13,%ymm13 + vpxor %ymm1,%ymm13,%ymm1 + vpslld $12,%ymm1,%ymm15 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm1,%ymm15,%ymm1 + vpaddd %ymm0,%ymm8,%ymm8 + vpxor %ymm4,%ymm8,%ymm4 + vpshufb %ymm14,%ymm4,%ymm4 + vpaddd %ymm1,%ymm9,%ymm9 + vpxor %ymm5,%ymm9,%ymm5 + vpshufb %ymm14,%ymm5,%ymm5 + vpaddd %ymm4,%ymm12,%ymm12 + vpxor %ymm0,%ymm12,%ymm0 + vpslld $7,%ymm0,%ymm15 + vpsrld $25,%ymm0,%ymm0 + vpor %ymm0,%ymm15,%ymm0 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm5,%ymm13,%ymm13 + vpxor %ymm1,%ymm13,%ymm1 + vpslld $7,%ymm1,%ymm14 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm1,%ymm14,%ymm1 + vmovdqa %ymm12,0(%rsp) + vmovdqa %ymm13,32(%rsp) + vmovdqa 64(%rsp),%ymm12 + vmovdqa 96(%rsp),%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vpxor %ymm6,%ymm10,%ymm6 + vpshufb %ymm15,%ymm6,%ymm6 + vpaddd %ymm3,%ymm11,%ymm11 + vpxor %ymm7,%ymm11,%ymm7 + vpshufb %ymm15,%ymm7,%ymm7 + vpaddd %ymm6,%ymm12,%ymm12 + vpxor %ymm2,%ymm12,%ymm2 + vpslld $12,%ymm2,%ymm14 + vpsrld $20,%ymm2,%ymm2 + vpor %ymm2,%ymm14,%ymm2 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm7,%ymm13,%ymm13 + vpxor %ymm3,%ymm13,%ymm3 + vpslld $12,%ymm3,%ymm15 + vpsrld $20,%ymm3,%ymm3 + vpor %ymm3,%ymm15,%ymm3 + vpaddd %ymm2,%ymm10,%ymm10 + vpxor %ymm6,%ymm10,%ymm6 + vpshufb %ymm14,%ymm6,%ymm6 + vpaddd %ymm3,%ymm11,%ymm11 + vpxor %ymm7,%ymm11,%ymm7 + vpshufb %ymm14,%ymm7,%ymm7 + vpaddd %ymm6,%ymm12,%ymm12 + vpxor %ymm2,%ymm12,%ymm2 + vpslld $7,%ymm2,%ymm15 + vpsrld $25,%ymm2,%ymm2 + vpor %ymm2,%ymm15,%ymm2 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm7,%ymm13,%ymm13 + vpxor %ymm3,%ymm13,%ymm3 + vpslld $7,%ymm3,%ymm14 + vpsrld $25,%ymm3,%ymm3 + vpor %ymm3,%ymm14,%ymm3 + vpaddd %ymm1,%ymm8,%ymm8 + vpxor %ymm7,%ymm8,%ymm7 + vpshufb %ymm15,%ymm7,%ymm7 + vpaddd %ymm2,%ymm9,%ymm9 + vpxor %ymm4,%ymm9,%ymm4 + vpshufb %ymm15,%ymm4,%ymm4 + vpaddd %ymm7,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm1 + vpslld $12,%ymm1,%ymm14 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm1,%ymm14,%ymm1 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm4,%ymm13,%ymm13 + vpxor %ymm2,%ymm13,%ymm2 + vpslld $12,%ymm2,%ymm15 + vpsrld $20,%ymm2,%ymm2 + vpor %ymm2,%ymm15,%ymm2 + vpaddd %ymm1,%ymm8,%ymm8 + vpxor %ymm7,%ymm8,%ymm7 + vpshufb %ymm14,%ymm7,%ymm7 + vpaddd %ymm2,%ymm9,%ymm9 + vpxor %ymm4,%ymm9,%ymm4 + vpshufb %ymm14,%ymm4,%ymm4 + vpaddd %ymm7,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm1 + vpslld $7,%ymm1,%ymm15 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm1,%ymm15,%ymm1 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm4,%ymm13,%ymm13 + vpxor %ymm2,%ymm13,%ymm2 + vpslld $7,%ymm2,%ymm14 + vpsrld $25,%ymm2,%ymm2 + vpor %ymm2,%ymm14,%ymm2 + vmovdqa %ymm12,64(%rsp) + vmovdqa %ymm13,96(%rsp) + vmovdqa 0(%rsp),%ymm12 + vmovdqa 32(%rsp),%ymm13 + vpaddd %ymm3,%ymm10,%ymm10 + vpxor %ymm5,%ymm10,%ymm5 + vpshufb %ymm15,%ymm5,%ymm5 + vpaddd %ymm0,%ymm11,%ymm11 + vpxor %ymm6,%ymm11,%ymm6 + vpshufb %ymm15,%ymm6,%ymm6 + vpaddd %ymm5,%ymm12,%ymm12 + vpxor %ymm3,%ymm12,%ymm3 + vpslld $12,%ymm3,%ymm14 + vpsrld $20,%ymm3,%ymm3 + vpor %ymm3,%ymm14,%ymm3 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm6,%ymm13,%ymm13 + vpxor %ymm0,%ymm13,%ymm0 + vpslld $12,%ymm0,%ymm15 + vpsrld $20,%ymm0,%ymm0 + vpor %ymm0,%ymm15,%ymm0 + vpaddd %ymm3,%ymm10,%ymm10 + vpxor %ymm5,%ymm10,%ymm5 + vpshufb %ymm14,%ymm5,%ymm5 + vpaddd %ymm0,%ymm11,%ymm11 + vpxor %ymm6,%ymm11,%ymm6 + vpshufb %ymm14,%ymm6,%ymm6 + vpaddd %ymm5,%ymm12,%ymm12 + vpxor %ymm3,%ymm12,%ymm3 + vpslld $7,%ymm3,%ymm15 + vpsrld $25,%ymm3,%ymm3 + vpor %ymm3,%ymm15,%ymm3 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm6,%ymm13,%ymm13 + vpxor %ymm0,%ymm13,%ymm0 + vpslld $7,%ymm0,%ymm14 + vpsrld $25,%ymm0,%ymm0 + vpor %ymm0,%ymm14,%ymm0 + decl %eax + jnz L$oop8x + + leaq 512(%rsp),%rax + vpaddd 128-256(%rcx),%ymm8,%ymm8 + vpaddd 160-256(%rcx),%ymm9,%ymm9 + vpaddd 192-256(%rcx),%ymm10,%ymm10 + vpaddd 224-256(%rcx),%ymm11,%ymm11 + + vpunpckldq %ymm9,%ymm8,%ymm14 + vpunpckldq %ymm11,%ymm10,%ymm15 + vpunpckhdq %ymm9,%ymm8,%ymm8 + vpunpckhdq %ymm11,%ymm10,%ymm10 + vpunpcklqdq %ymm15,%ymm14,%ymm9 + vpunpckhqdq %ymm15,%ymm14,%ymm14 + vpunpcklqdq %ymm10,%ymm8,%ymm11 + vpunpckhqdq %ymm10,%ymm8,%ymm8 + vpaddd 256-256(%rcx),%ymm0,%ymm0 + vpaddd 288-256(%rcx),%ymm1,%ymm1 + vpaddd 320-256(%rcx),%ymm2,%ymm2 + vpaddd 352-256(%rcx),%ymm3,%ymm3 + + vpunpckldq %ymm1,%ymm0,%ymm10 + vpunpckldq %ymm3,%ymm2,%ymm15 + vpunpckhdq %ymm1,%ymm0,%ymm0 + vpunpckhdq %ymm3,%ymm2,%ymm2 + vpunpcklqdq %ymm15,%ymm10,%ymm1 + vpunpckhqdq %ymm15,%ymm10,%ymm10 + vpunpcklqdq %ymm2,%ymm0,%ymm3 + vpunpckhqdq %ymm2,%ymm0,%ymm0 + vperm2i128 $0x20,%ymm1,%ymm9,%ymm15 + vperm2i128 $0x31,%ymm1,%ymm9,%ymm1 + vperm2i128 $0x20,%ymm10,%ymm14,%ymm9 + vperm2i128 $0x31,%ymm10,%ymm14,%ymm10 + vperm2i128 $0x20,%ymm3,%ymm11,%ymm14 + vperm2i128 $0x31,%ymm3,%ymm11,%ymm3 + vperm2i128 $0x20,%ymm0,%ymm8,%ymm11 + vperm2i128 $0x31,%ymm0,%ymm8,%ymm0 + vmovdqa %ymm15,0(%rsp) + vmovdqa %ymm9,32(%rsp) + vmovdqa 64(%rsp),%ymm15 + vmovdqa 96(%rsp),%ymm9 + + vpaddd 384-512(%rax),%ymm12,%ymm12 + vpaddd 416-512(%rax),%ymm13,%ymm13 + vpaddd 448-512(%rax),%ymm15,%ymm15 + vpaddd 480-512(%rax),%ymm9,%ymm9 + + vpunpckldq %ymm13,%ymm12,%ymm2 + vpunpckldq %ymm9,%ymm15,%ymm8 + vpunpckhdq %ymm13,%ymm12,%ymm12 + vpunpckhdq %ymm9,%ymm15,%ymm15 + vpunpcklqdq %ymm8,%ymm2,%ymm13 + vpunpckhqdq %ymm8,%ymm2,%ymm2 + vpunpcklqdq %ymm15,%ymm12,%ymm9 + vpunpckhqdq %ymm15,%ymm12,%ymm12 + vpaddd 512-512(%rax),%ymm4,%ymm4 + vpaddd 544-512(%rax),%ymm5,%ymm5 + vpaddd 576-512(%rax),%ymm6,%ymm6 + vpaddd 608-512(%rax),%ymm7,%ymm7 + + vpunpckldq %ymm5,%ymm4,%ymm15 + vpunpckldq %ymm7,%ymm6,%ymm8 + vpunpckhdq %ymm5,%ymm4,%ymm4 + vpunpckhdq %ymm7,%ymm6,%ymm6 + vpunpcklqdq %ymm8,%ymm15,%ymm5 + vpunpckhqdq %ymm8,%ymm15,%ymm15 + vpunpcklqdq %ymm6,%ymm4,%ymm7 + vpunpckhqdq %ymm6,%ymm4,%ymm4 + vperm2i128 $0x20,%ymm5,%ymm13,%ymm8 + vperm2i128 $0x31,%ymm5,%ymm13,%ymm5 + vperm2i128 $0x20,%ymm15,%ymm2,%ymm13 + vperm2i128 $0x31,%ymm15,%ymm2,%ymm15 + vperm2i128 $0x20,%ymm7,%ymm9,%ymm2 + vperm2i128 $0x31,%ymm7,%ymm9,%ymm7 + vperm2i128 $0x20,%ymm4,%ymm12,%ymm9 + vperm2i128 $0x31,%ymm4,%ymm12,%ymm4 + vmovdqa 0(%rsp),%ymm6 + vmovdqa 32(%rsp),%ymm12 + + cmpq $512,%rdx + jb L$tail8x + + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + leaq 128(%rsi),%rsi + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm12,%ymm12 + vpxor 32(%rsi),%ymm13,%ymm13 + vpxor 64(%rsi),%ymm10,%ymm10 + vpxor 96(%rsi),%ymm15,%ymm15 + leaq 128(%rsi),%rsi + vmovdqu %ymm12,0(%rdi) + vmovdqu %ymm13,32(%rdi) + vmovdqu %ymm10,64(%rdi) + vmovdqu %ymm15,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm14,%ymm14 + vpxor 32(%rsi),%ymm2,%ymm2 + vpxor 64(%rsi),%ymm3,%ymm3 + vpxor 96(%rsi),%ymm7,%ymm7 + leaq 128(%rsi),%rsi + vmovdqu %ymm14,0(%rdi) + vmovdqu %ymm2,32(%rdi) + vmovdqu %ymm3,64(%rdi) + vmovdqu %ymm7,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm11,%ymm11 + vpxor 32(%rsi),%ymm9,%ymm9 + vpxor 64(%rsi),%ymm0,%ymm0 + vpxor 96(%rsi),%ymm4,%ymm4 + leaq 128(%rsi),%rsi + vmovdqu %ymm11,0(%rdi) + vmovdqu %ymm9,32(%rdi) + vmovdqu %ymm0,64(%rdi) + vmovdqu %ymm4,96(%rdi) + leaq 128(%rdi),%rdi + + subq $512,%rdx + jnz L$oop_outer8x + + jmp L$done8x + +L$tail8x: + cmpq $448,%rdx + jae L$448_or_more8x + cmpq $384,%rdx + jae L$384_or_more8x + cmpq $320,%rdx + jae L$320_or_more8x + cmpq $256,%rdx + jae L$256_or_more8x + cmpq $192,%rdx + jae L$192_or_more8x + cmpq $128,%rdx + jae L$128_or_more8x + cmpq $64,%rdx + jae L$64_or_more8x + + xorq %r10,%r10 + vmovdqa %ymm6,0(%rsp) + vmovdqa %ymm8,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$64_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + je L$done8x + + leaq 64(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm1,0(%rsp) + leaq 64(%rdi),%rdi + subq $64,%rdx + vmovdqa %ymm5,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$128_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + je L$done8x + + leaq 128(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm12,0(%rsp) + leaq 128(%rdi),%rdi + subq $128,%rdx + vmovdqa %ymm13,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$192_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + je L$done8x + + leaq 192(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm10,0(%rsp) + leaq 192(%rdi),%rdi + subq $192,%rdx + vmovdqa %ymm15,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$256_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + je L$done8x + + leaq 256(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm14,0(%rsp) + leaq 256(%rdi),%rdi + subq $256,%rdx + vmovdqa %ymm2,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$320_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + je L$done8x + + leaq 320(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm3,0(%rsp) + leaq 320(%rdi),%rdi + subq $320,%rdx + vmovdqa %ymm7,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$384_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vpxor 320(%rsi),%ymm3,%ymm3 + vpxor 352(%rsi),%ymm7,%ymm7 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm7,352(%rdi) + je L$done8x + + leaq 384(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm11,0(%rsp) + leaq 384(%rdi),%rdi + subq $384,%rdx + vmovdqa %ymm9,32(%rsp) + jmp L$oop_tail8x + +.p2align 5 +L$448_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vpxor 320(%rsi),%ymm3,%ymm3 + vpxor 352(%rsi),%ymm7,%ymm7 + vpxor 384(%rsi),%ymm11,%ymm11 + vpxor 416(%rsi),%ymm9,%ymm9 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm7,352(%rdi) + vmovdqu %ymm11,384(%rdi) + vmovdqu %ymm9,416(%rdi) + je L$done8x + + leaq 448(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm0,0(%rsp) + leaq 448(%rdi),%rdi + subq $448,%rdx + vmovdqa %ymm4,32(%rsp) + +L$oop_tail8x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz L$oop_tail8x + +L$done8x: + vzeroall + leaq (%r9),%rsp + +L$8x_epilogue: + ret + + +#endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-linux.S b/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-linux.S new file mode 100644 index 0000000000..9dbf7d1f06 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-linux.S @@ -0,0 +1,1610 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + +.section .rodata +.align 64 +.Lzero: +.long 0,0,0,0 +.Lone: +.long 1,0,0,0 +.Linc: +.long 0,1,2,3 +.Lfour: +.long 4,4,4,4 +.Lincy: +.long 0,2,4,6,1,3,5,7 +.Leight: +.long 8,8,8,8,8,8,8,8 +.Lrot16: +.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd +.Lrot24: +.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe +.Lsigma: +.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 +.align 64 +.Lzeroz: +.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 +.Lfourz: +.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 +.Lincz: +.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +.Lsixteen: +.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text +.globl ChaCha20_ctr32_nohw +.hidden ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,@function +.align 64 +ChaCha20_ctr32_nohw: +.cfi_startproc +_CET_ENDBR + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15,-56 + subq $64+24,%rsp +.cfi_adjust_cfa_offset 88 +.Lctr32_body: + + + movdqu (%rcx),%xmm1 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa .Lone(%rip),%xmm4 + + + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + movq %rdx,%rbp + jmp .Loop_outer + +.align 32 +.Loop_outer: + movl $0x61707865,%eax + movl $0x3320646e,%ebx + movl $0x79622d32,%ecx + movl $0x6b206574,%edx + movl 16(%rsp),%r8d + movl 20(%rsp),%r9d + movl 24(%rsp),%r10d + movl 28(%rsp),%r11d + movd %xmm3,%r12d + movl 52(%rsp),%r13d + movl 56(%rsp),%r14d + movl 60(%rsp),%r15d + + movq %rbp,64+0(%rsp) + movl $10,%ebp + movq %rsi,64+8(%rsp) +.byte 102,72,15,126,214 + movq %rdi,64+16(%rsp) + movq %rsi,%rdi + shrq $32,%rdi + jmp .Loop + +.align 32 +.Loop: + addl %r8d,%eax + xorl %eax,%r12d + roll $16,%r12d + addl %r9d,%ebx + xorl %ebx,%r13d + roll $16,%r13d + addl %r12d,%esi + xorl %esi,%r8d + roll $12,%r8d + addl %r13d,%edi + xorl %edi,%r9d + roll $12,%r9d + addl %r8d,%eax + xorl %eax,%r12d + roll $8,%r12d + addl %r9d,%ebx + xorl %ebx,%r13d + roll $8,%r13d + addl %r12d,%esi + xorl %esi,%r8d + roll $7,%r8d + addl %r13d,%edi + xorl %edi,%r9d + roll $7,%r9d + movl %esi,32(%rsp) + movl %edi,36(%rsp) + movl 40(%rsp),%esi + movl 44(%rsp),%edi + addl %r10d,%ecx + xorl %ecx,%r14d + roll $16,%r14d + addl %r11d,%edx + xorl %edx,%r15d + roll $16,%r15d + addl %r14d,%esi + xorl %esi,%r10d + roll $12,%r10d + addl %r15d,%edi + xorl %edi,%r11d + roll $12,%r11d + addl %r10d,%ecx + xorl %ecx,%r14d + roll $8,%r14d + addl %r11d,%edx + xorl %edx,%r15d + roll $8,%r15d + addl %r14d,%esi + xorl %esi,%r10d + roll $7,%r10d + addl %r15d,%edi + xorl %edi,%r11d + roll $7,%r11d + addl %r9d,%eax + xorl %eax,%r15d + roll $16,%r15d + addl %r10d,%ebx + xorl %ebx,%r12d + roll $16,%r12d + addl %r15d,%esi + xorl %esi,%r9d + roll $12,%r9d + addl %r12d,%edi + xorl %edi,%r10d + roll $12,%r10d + addl %r9d,%eax + xorl %eax,%r15d + roll $8,%r15d + addl %r10d,%ebx + xorl %ebx,%r12d + roll $8,%r12d + addl %r15d,%esi + xorl %esi,%r9d + roll $7,%r9d + addl %r12d,%edi + xorl %edi,%r10d + roll $7,%r10d + movl %esi,40(%rsp) + movl %edi,44(%rsp) + movl 32(%rsp),%esi + movl 36(%rsp),%edi + addl %r11d,%ecx + xorl %ecx,%r13d + roll $16,%r13d + addl %r8d,%edx + xorl %edx,%r14d + roll $16,%r14d + addl %r13d,%esi + xorl %esi,%r11d + roll $12,%r11d + addl %r14d,%edi + xorl %edi,%r8d + roll $12,%r8d + addl %r11d,%ecx + xorl %ecx,%r13d + roll $8,%r13d + addl %r8d,%edx + xorl %edx,%r14d + roll $8,%r14d + addl %r13d,%esi + xorl %esi,%r11d + roll $7,%r11d + addl %r14d,%edi + xorl %edi,%r8d + roll $7,%r8d + decl %ebp + jnz .Loop + movl %edi,36(%rsp) + movl %esi,32(%rsp) + movq 64(%rsp),%rbp + movdqa %xmm2,%xmm1 + movq 64+8(%rsp),%rsi + paddd %xmm4,%xmm3 + movq 64+16(%rsp),%rdi + + addl $0x61707865,%eax + addl $0x3320646e,%ebx + addl $0x79622d32,%ecx + addl $0x6b206574,%edx + addl 16(%rsp),%r8d + addl 20(%rsp),%r9d + addl 24(%rsp),%r10d + addl 28(%rsp),%r11d + addl 48(%rsp),%r12d + addl 52(%rsp),%r13d + addl 56(%rsp),%r14d + addl 60(%rsp),%r15d + paddd 32(%rsp),%xmm1 + + cmpq $64,%rbp + jb .Ltail + + xorl 0(%rsi),%eax + xorl 4(%rsi),%ebx + xorl 8(%rsi),%ecx + xorl 12(%rsi),%edx + xorl 16(%rsi),%r8d + xorl 20(%rsi),%r9d + xorl 24(%rsi),%r10d + xorl 28(%rsi),%r11d + movdqu 32(%rsi),%xmm0 + xorl 48(%rsi),%r12d + xorl 52(%rsi),%r13d + xorl 56(%rsi),%r14d + xorl 60(%rsi),%r15d + leaq 64(%rsi),%rsi + pxor %xmm1,%xmm0 + + movdqa %xmm2,32(%rsp) + movd %xmm3,48(%rsp) + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + movdqu %xmm0,32(%rdi) + movl %r12d,48(%rdi) + movl %r13d,52(%rdi) + movl %r14d,56(%rdi) + movl %r15d,60(%rdi) + leaq 64(%rdi),%rdi + + subq $64,%rbp + jnz .Loop_outer + + jmp .Ldone + +.align 16 +.Ltail: + movl %eax,0(%rsp) + movl %ebx,4(%rsp) + xorq %rbx,%rbx + movl %ecx,8(%rsp) + movl %edx,12(%rsp) + movl %r8d,16(%rsp) + movl %r9d,20(%rsp) + movl %r10d,24(%rsp) + movl %r11d,28(%rsp) + movdqa %xmm1,32(%rsp) + movl %r12d,48(%rsp) + movl %r13d,52(%rsp) + movl %r14d,56(%rsp) + movl %r15d,60(%rsp) + +.Loop_tail: + movzbl (%rsi,%rbx,1),%eax + movzbl (%rsp,%rbx,1),%edx + leaq 1(%rbx),%rbx + xorl %edx,%eax + movb %al,-1(%rdi,%rbx,1) + decq %rbp + jnz .Loop_tail + +.Ldone: + leaq 64+24+48(%rsp),%rsi + movq -48(%rsi),%r15 +.cfi_restore r15 + movq -40(%rsi),%r14 +.cfi_restore r14 + movq -32(%rsi),%r13 +.cfi_restore r13 + movq -24(%rsi),%r12 +.cfi_restore r12 + movq -16(%rsi),%rbp +.cfi_restore rbp + movq -8(%rsi),%rbx +.cfi_restore rbx + leaq (%rsi),%rsp +.cfi_adjust_cfa_offset -136 +.Lno_data: + ret +.cfi_endproc +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw +.globl ChaCha20_ctr32_ssse3 +.hidden ChaCha20_ctr32_ssse3 +.type ChaCha20_ctr32_ssse3,@function +.align 32 +ChaCha20_ctr32_ssse3: +.cfi_startproc +_CET_ENDBR + movq %rsp,%r9 +.cfi_def_cfa_register r9 + subq $64+8,%rsp + movdqa .Lsigma(%rip),%xmm0 + movdqu (%rcx),%xmm1 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa .Lrot16(%rip),%xmm6 + movdqa .Lrot24(%rip),%xmm7 + + movdqa %xmm0,0(%rsp) + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + movq $10,%r8 + jmp .Loop_ssse3 + +.align 32 +.Loop_outer_ssse3: + movdqa .Lone(%rip),%xmm3 + movdqa 0(%rsp),%xmm0 + movdqa 16(%rsp),%xmm1 + movdqa 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + movq $10,%r8 + movdqa %xmm3,48(%rsp) + jmp .Loop_ssse3 + +.align 32 +.Loop_ssse3: + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm1,%xmm1 + pshufd $147,%xmm3,%xmm3 + nop + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm1,%xmm1 + pshufd $57,%xmm3,%xmm3 + decq %r8 + jnz .Loop_ssse3 + paddd 0(%rsp),%xmm0 + paddd 16(%rsp),%xmm1 + paddd 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + + cmpq $64,%rdx + jb .Ltail_ssse3 + + movdqu 0(%rsi),%xmm4 + movdqu 16(%rsi),%xmm5 + pxor %xmm4,%xmm0 + movdqu 32(%rsi),%xmm4 + pxor %xmm5,%xmm1 + movdqu 48(%rsi),%xmm5 + leaq 64(%rsi),%rsi + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm3 + + movdqu %xmm0,0(%rdi) + movdqu %xmm1,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + leaq 64(%rdi),%rdi + + subq $64,%rdx + jnz .Loop_outer_ssse3 + + jmp .Ldone_ssse3 + +.align 16 +.Ltail_ssse3: + movdqa %xmm0,0(%rsp) + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + xorq %r8,%r8 + +.Loop_tail_ssse3: + movzbl (%rsi,%r8,1),%eax + movzbl (%rsp,%r8,1),%ecx + leaq 1(%r8),%r8 + xorl %ecx,%eax + movb %al,-1(%rdi,%r8,1) + decq %rdx + jnz .Loop_tail_ssse3 + +.Ldone_ssse3: + leaq (%r9),%rsp +.cfi_def_cfa_register rsp +.Lssse3_epilogue: + ret +.cfi_endproc +.size ChaCha20_ctr32_ssse3,.-ChaCha20_ctr32_ssse3 +.globl ChaCha20_ctr32_ssse3_4x +.hidden ChaCha20_ctr32_ssse3_4x +.type ChaCha20_ctr32_ssse3_4x,@function +.align 32 +ChaCha20_ctr32_ssse3_4x: +.cfi_startproc +_CET_ENDBR + movq %rsp,%r9 +.cfi_def_cfa_register r9 + movq %r10,%r11 + subq $0x140+8,%rsp + movdqa .Lsigma(%rip),%xmm11 + movdqu (%rcx),%xmm15 + movdqu 16(%rcx),%xmm7 + movdqu (%r8),%xmm3 + leaq 256(%rsp),%rcx + leaq .Lrot16(%rip),%r10 + leaq .Lrot24(%rip),%r11 + + pshufd $0x00,%xmm11,%xmm8 + pshufd $0x55,%xmm11,%xmm9 + movdqa %xmm8,64(%rsp) + pshufd $0xaa,%xmm11,%xmm10 + movdqa %xmm9,80(%rsp) + pshufd $0xff,%xmm11,%xmm11 + movdqa %xmm10,96(%rsp) + movdqa %xmm11,112(%rsp) + + pshufd $0x00,%xmm15,%xmm12 + pshufd $0x55,%xmm15,%xmm13 + movdqa %xmm12,128-256(%rcx) + pshufd $0xaa,%xmm15,%xmm14 + movdqa %xmm13,144-256(%rcx) + pshufd $0xff,%xmm15,%xmm15 + movdqa %xmm14,160-256(%rcx) + movdqa %xmm15,176-256(%rcx) + + pshufd $0x00,%xmm7,%xmm4 + pshufd $0x55,%xmm7,%xmm5 + movdqa %xmm4,192-256(%rcx) + pshufd $0xaa,%xmm7,%xmm6 + movdqa %xmm5,208-256(%rcx) + pshufd $0xff,%xmm7,%xmm7 + movdqa %xmm6,224-256(%rcx) + movdqa %xmm7,240-256(%rcx) + + pshufd $0x00,%xmm3,%xmm0 + pshufd $0x55,%xmm3,%xmm1 + paddd .Linc(%rip),%xmm0 + pshufd $0xaa,%xmm3,%xmm2 + movdqa %xmm1,272-256(%rcx) + pshufd $0xff,%xmm3,%xmm3 + movdqa %xmm2,288-256(%rcx) + movdqa %xmm3,304-256(%rcx) + + jmp .Loop_enter4x + +.align 32 +.Loop_outer4x: + movdqa 64(%rsp),%xmm8 + movdqa 80(%rsp),%xmm9 + movdqa 96(%rsp),%xmm10 + movdqa 112(%rsp),%xmm11 + movdqa 128-256(%rcx),%xmm12 + movdqa 144-256(%rcx),%xmm13 + movdqa 160-256(%rcx),%xmm14 + movdqa 176-256(%rcx),%xmm15 + movdqa 192-256(%rcx),%xmm4 + movdqa 208-256(%rcx),%xmm5 + movdqa 224-256(%rcx),%xmm6 + movdqa 240-256(%rcx),%xmm7 + movdqa 256-256(%rcx),%xmm0 + movdqa 272-256(%rcx),%xmm1 + movdqa 288-256(%rcx),%xmm2 + movdqa 304-256(%rcx),%xmm3 + paddd .Lfour(%rip),%xmm0 + +.Loop_enter4x: + movdqa %xmm6,32(%rsp) + movdqa %xmm7,48(%rsp) + movdqa (%r10),%xmm7 + movl $10,%eax + movdqa %xmm0,256-256(%rcx) + jmp .Loop4x + +.align 32 +.Loop4x: + paddd %xmm12,%xmm8 + paddd %xmm13,%xmm9 + pxor %xmm8,%xmm0 + pxor %xmm9,%xmm1 +.byte 102,15,56,0,199 +.byte 102,15,56,0,207 + paddd %xmm0,%xmm4 + paddd %xmm1,%xmm5 + pxor %xmm4,%xmm12 + pxor %xmm5,%xmm13 + movdqa %xmm12,%xmm6 + pslld $12,%xmm12 + psrld $20,%xmm6 + movdqa %xmm13,%xmm7 + pslld $12,%xmm13 + por %xmm6,%xmm12 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm13 + paddd %xmm12,%xmm8 + paddd %xmm13,%xmm9 + pxor %xmm8,%xmm0 + pxor %xmm9,%xmm1 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 + paddd %xmm0,%xmm4 + paddd %xmm1,%xmm5 + pxor %xmm4,%xmm12 + pxor %xmm5,%xmm13 + movdqa %xmm12,%xmm7 + pslld $7,%xmm12 + psrld $25,%xmm7 + movdqa %xmm13,%xmm6 + pslld $7,%xmm13 + por %xmm7,%xmm12 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm13 + movdqa %xmm4,0(%rsp) + movdqa %xmm5,16(%rsp) + movdqa 32(%rsp),%xmm4 + movdqa 48(%rsp),%xmm5 + paddd %xmm14,%xmm10 + paddd %xmm15,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm3 +.byte 102,15,56,0,215 +.byte 102,15,56,0,223 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + pxor %xmm4,%xmm14 + pxor %xmm5,%xmm15 + movdqa %xmm14,%xmm6 + pslld $12,%xmm14 + psrld $20,%xmm6 + movdqa %xmm15,%xmm7 + pslld $12,%xmm15 + por %xmm6,%xmm14 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm15 + paddd %xmm14,%xmm10 + paddd %xmm15,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm3 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + pxor %xmm4,%xmm14 + pxor %xmm5,%xmm15 + movdqa %xmm14,%xmm7 + pslld $7,%xmm14 + psrld $25,%xmm7 + movdqa %xmm15,%xmm6 + pslld $7,%xmm15 + por %xmm7,%xmm14 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm15 + paddd %xmm13,%xmm8 + paddd %xmm14,%xmm9 + pxor %xmm8,%xmm3 + pxor %xmm9,%xmm0 +.byte 102,15,56,0,223 +.byte 102,15,56,0,199 + paddd %xmm3,%xmm4 + paddd %xmm0,%xmm5 + pxor %xmm4,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm13,%xmm6 + pslld $12,%xmm13 + psrld $20,%xmm6 + movdqa %xmm14,%xmm7 + pslld $12,%xmm14 + por %xmm6,%xmm13 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm14 + paddd %xmm13,%xmm8 + paddd %xmm14,%xmm9 + pxor %xmm8,%xmm3 + pxor %xmm9,%xmm0 +.byte 102,15,56,0,222 +.byte 102,15,56,0,198 + paddd %xmm3,%xmm4 + paddd %xmm0,%xmm5 + pxor %xmm4,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm13,%xmm7 + pslld $7,%xmm13 + psrld $25,%xmm7 + movdqa %xmm14,%xmm6 + pslld $7,%xmm14 + por %xmm7,%xmm13 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm14 + movdqa %xmm4,32(%rsp) + movdqa %xmm5,48(%rsp) + movdqa 0(%rsp),%xmm4 + movdqa 16(%rsp),%xmm5 + paddd %xmm15,%xmm10 + paddd %xmm12,%xmm11 + pxor %xmm10,%xmm1 + pxor %xmm11,%xmm2 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + paddd %xmm1,%xmm4 + paddd %xmm2,%xmm5 + pxor %xmm4,%xmm15 + pxor %xmm5,%xmm12 + movdqa %xmm15,%xmm6 + pslld $12,%xmm15 + psrld $20,%xmm6 + movdqa %xmm12,%xmm7 + pslld $12,%xmm12 + por %xmm6,%xmm15 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm12 + paddd %xmm15,%xmm10 + paddd %xmm12,%xmm11 + pxor %xmm10,%xmm1 + pxor %xmm11,%xmm2 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + paddd %xmm1,%xmm4 + paddd %xmm2,%xmm5 + pxor %xmm4,%xmm15 + pxor %xmm5,%xmm12 + movdqa %xmm15,%xmm7 + pslld $7,%xmm15 + psrld $25,%xmm7 + movdqa %xmm12,%xmm6 + pslld $7,%xmm12 + por %xmm7,%xmm15 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm12 + decl %eax + jnz .Loop4x + + paddd 64(%rsp),%xmm8 + paddd 80(%rsp),%xmm9 + paddd 96(%rsp),%xmm10 + paddd 112(%rsp),%xmm11 + + movdqa %xmm8,%xmm6 + punpckldq %xmm9,%xmm8 + movdqa %xmm10,%xmm7 + punpckldq %xmm11,%xmm10 + punpckhdq %xmm9,%xmm6 + punpckhdq %xmm11,%xmm7 + movdqa %xmm8,%xmm9 + punpcklqdq %xmm10,%xmm8 + movdqa %xmm6,%xmm11 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm10,%xmm9 + punpckhqdq %xmm7,%xmm11 + paddd 128-256(%rcx),%xmm12 + paddd 144-256(%rcx),%xmm13 + paddd 160-256(%rcx),%xmm14 + paddd 176-256(%rcx),%xmm15 + + movdqa %xmm8,0(%rsp) + movdqa %xmm9,16(%rsp) + movdqa 32(%rsp),%xmm8 + movdqa 48(%rsp),%xmm9 + + movdqa %xmm12,%xmm10 + punpckldq %xmm13,%xmm12 + movdqa %xmm14,%xmm7 + punpckldq %xmm15,%xmm14 + punpckhdq %xmm13,%xmm10 + punpckhdq %xmm15,%xmm7 + movdqa %xmm12,%xmm13 + punpcklqdq %xmm14,%xmm12 + movdqa %xmm10,%xmm15 + punpcklqdq %xmm7,%xmm10 + punpckhqdq %xmm14,%xmm13 + punpckhqdq %xmm7,%xmm15 + paddd 192-256(%rcx),%xmm4 + paddd 208-256(%rcx),%xmm5 + paddd 224-256(%rcx),%xmm8 + paddd 240-256(%rcx),%xmm9 + + movdqa %xmm6,32(%rsp) + movdqa %xmm11,48(%rsp) + + movdqa %xmm4,%xmm14 + punpckldq %xmm5,%xmm4 + movdqa %xmm8,%xmm7 + punpckldq %xmm9,%xmm8 + punpckhdq %xmm5,%xmm14 + punpckhdq %xmm9,%xmm7 + movdqa %xmm4,%xmm5 + punpcklqdq %xmm8,%xmm4 + movdqa %xmm14,%xmm9 + punpcklqdq %xmm7,%xmm14 + punpckhqdq %xmm8,%xmm5 + punpckhqdq %xmm7,%xmm9 + paddd 256-256(%rcx),%xmm0 + paddd 272-256(%rcx),%xmm1 + paddd 288-256(%rcx),%xmm2 + paddd 304-256(%rcx),%xmm3 + + movdqa %xmm0,%xmm8 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm8 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm8,%xmm3 + punpcklqdq %xmm7,%xmm8 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + cmpq $256,%rdx + jb .Ltail4x + + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + + movdqu %xmm6,64(%rdi) + movdqu 0(%rsi),%xmm6 + movdqu %xmm11,80(%rdi) + movdqu 16(%rsi),%xmm11 + movdqu %xmm2,96(%rdi) + movdqu 32(%rsi),%xmm2 + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + movdqu 48(%rsi),%xmm7 + pxor 32(%rsp),%xmm6 + pxor %xmm10,%xmm11 + pxor %xmm14,%xmm2 + pxor %xmm8,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 48(%rsp),%xmm6 + pxor %xmm15,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm3,%xmm7 + movdqu %xmm6,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm2,96(%rdi) + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + + subq $256,%rdx + jnz .Loop_outer4x + + jmp .Ldone4x + +.Ltail4x: + cmpq $192,%rdx + jae .L192_or_more4x + cmpq $128,%rdx + jae .L128_or_more4x + cmpq $64,%rdx + jae .L64_or_more4x + + + xorq %r10,%r10 + + movdqa %xmm12,16(%rsp) + movdqa %xmm4,32(%rsp) + movdqa %xmm0,48(%rsp) + jmp .Loop_tail4x + +.align 32 +.L64_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + movdqu %xmm6,0(%rdi) + movdqu %xmm11,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm7,48(%rdi) + je .Ldone4x + + movdqa 16(%rsp),%xmm6 + leaq 64(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm13,16(%rsp) + leaq 64(%rdi),%rdi + movdqa %xmm5,32(%rsp) + subq $64,%rdx + movdqa %xmm1,48(%rsp) + jmp .Loop_tail4x + +.align 32 +.L128_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + movdqu %xmm6,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm2,96(%rdi) + movdqu %xmm7,112(%rdi) + je .Ldone4x + + movdqa 32(%rsp),%xmm6 + leaq 128(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm10,16(%rsp) + leaq 128(%rdi),%rdi + movdqa %xmm14,32(%rsp) + subq $128,%rdx + movdqa %xmm8,48(%rsp) + jmp .Loop_tail4x + +.align 32 +.L192_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + + movdqu %xmm6,64(%rdi) + movdqu 0(%rsi),%xmm6 + movdqu %xmm11,80(%rdi) + movdqu 16(%rsi),%xmm11 + movdqu %xmm2,96(%rdi) + movdqu 32(%rsi),%xmm2 + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + movdqu 48(%rsi),%xmm7 + pxor 32(%rsp),%xmm6 + pxor %xmm10,%xmm11 + pxor %xmm14,%xmm2 + pxor %xmm8,%xmm7 + movdqu %xmm6,0(%rdi) + movdqu %xmm11,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm7,48(%rdi) + je .Ldone4x + + movdqa 48(%rsp),%xmm6 + leaq 64(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm15,16(%rsp) + leaq 64(%rdi),%rdi + movdqa %xmm9,32(%rsp) + subq $192,%rdx + movdqa %xmm3,48(%rsp) + +.Loop_tail4x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail4x + +.Ldone4x: + leaq (%r9),%rsp +.cfi_def_cfa_register rsp +.L4x_epilogue: + ret +.cfi_endproc +.size ChaCha20_ctr32_ssse3_4x,.-ChaCha20_ctr32_ssse3_4x +.globl ChaCha20_ctr32_avx2 +.hidden ChaCha20_ctr32_avx2 +.type ChaCha20_ctr32_avx2,@function +.align 32 +ChaCha20_ctr32_avx2: +.cfi_startproc +_CET_ENDBR + movq %rsp,%r9 +.cfi_def_cfa_register r9 + subq $0x280+8,%rsp + andq $-32,%rsp + vzeroupper + + + + + + + + + + + vbroadcasti128 .Lsigma(%rip),%ymm11 + vbroadcasti128 (%rcx),%ymm3 + vbroadcasti128 16(%rcx),%ymm15 + vbroadcasti128 (%r8),%ymm7 + leaq 256(%rsp),%rcx + leaq 512(%rsp),%rax + leaq .Lrot16(%rip),%r10 + leaq .Lrot24(%rip),%r11 + + vpshufd $0x00,%ymm11,%ymm8 + vpshufd $0x55,%ymm11,%ymm9 + vmovdqa %ymm8,128-256(%rcx) + vpshufd $0xaa,%ymm11,%ymm10 + vmovdqa %ymm9,160-256(%rcx) + vpshufd $0xff,%ymm11,%ymm11 + vmovdqa %ymm10,192-256(%rcx) + vmovdqa %ymm11,224-256(%rcx) + + vpshufd $0x00,%ymm3,%ymm0 + vpshufd $0x55,%ymm3,%ymm1 + vmovdqa %ymm0,256-256(%rcx) + vpshufd $0xaa,%ymm3,%ymm2 + vmovdqa %ymm1,288-256(%rcx) + vpshufd $0xff,%ymm3,%ymm3 + vmovdqa %ymm2,320-256(%rcx) + vmovdqa %ymm3,352-256(%rcx) + + vpshufd $0x00,%ymm15,%ymm12 + vpshufd $0x55,%ymm15,%ymm13 + vmovdqa %ymm12,384-512(%rax) + vpshufd $0xaa,%ymm15,%ymm14 + vmovdqa %ymm13,416-512(%rax) + vpshufd $0xff,%ymm15,%ymm15 + vmovdqa %ymm14,448-512(%rax) + vmovdqa %ymm15,480-512(%rax) + + vpshufd $0x00,%ymm7,%ymm4 + vpshufd $0x55,%ymm7,%ymm5 + vpaddd .Lincy(%rip),%ymm4,%ymm4 + vpshufd $0xaa,%ymm7,%ymm6 + vmovdqa %ymm5,544-512(%rax) + vpshufd $0xff,%ymm7,%ymm7 + vmovdqa %ymm6,576-512(%rax) + vmovdqa %ymm7,608-512(%rax) + + jmp .Loop_enter8x + +.align 32 +.Loop_outer8x: + vmovdqa 128-256(%rcx),%ymm8 + vmovdqa 160-256(%rcx),%ymm9 + vmovdqa 192-256(%rcx),%ymm10 + vmovdqa 224-256(%rcx),%ymm11 + vmovdqa 256-256(%rcx),%ymm0 + vmovdqa 288-256(%rcx),%ymm1 + vmovdqa 320-256(%rcx),%ymm2 + vmovdqa 352-256(%rcx),%ymm3 + vmovdqa 384-512(%rax),%ymm12 + vmovdqa 416-512(%rax),%ymm13 + vmovdqa 448-512(%rax),%ymm14 + vmovdqa 480-512(%rax),%ymm15 + vmovdqa 512-512(%rax),%ymm4 + vmovdqa 544-512(%rax),%ymm5 + vmovdqa 576-512(%rax),%ymm6 + vmovdqa 608-512(%rax),%ymm7 + vpaddd .Leight(%rip),%ymm4,%ymm4 + +.Loop_enter8x: + vmovdqa %ymm14,64(%rsp) + vmovdqa %ymm15,96(%rsp) + vbroadcasti128 (%r10),%ymm15 + vmovdqa %ymm4,512-512(%rax) + movl $10,%eax + jmp .Loop8x + +.align 32 +.Loop8x: + vpaddd %ymm0,%ymm8,%ymm8 + vpxor %ymm4,%ymm8,%ymm4 + vpshufb %ymm15,%ymm4,%ymm4 + vpaddd %ymm1,%ymm9,%ymm9 + vpxor %ymm5,%ymm9,%ymm5 + vpshufb %ymm15,%ymm5,%ymm5 + vpaddd %ymm4,%ymm12,%ymm12 + vpxor %ymm0,%ymm12,%ymm0 + vpslld $12,%ymm0,%ymm14 + vpsrld $20,%ymm0,%ymm0 + vpor %ymm0,%ymm14,%ymm0 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm5,%ymm13,%ymm13 + vpxor %ymm1,%ymm13,%ymm1 + vpslld $12,%ymm1,%ymm15 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm1,%ymm15,%ymm1 + vpaddd %ymm0,%ymm8,%ymm8 + vpxor %ymm4,%ymm8,%ymm4 + vpshufb %ymm14,%ymm4,%ymm4 + vpaddd %ymm1,%ymm9,%ymm9 + vpxor %ymm5,%ymm9,%ymm5 + vpshufb %ymm14,%ymm5,%ymm5 + vpaddd %ymm4,%ymm12,%ymm12 + vpxor %ymm0,%ymm12,%ymm0 + vpslld $7,%ymm0,%ymm15 + vpsrld $25,%ymm0,%ymm0 + vpor %ymm0,%ymm15,%ymm0 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm5,%ymm13,%ymm13 + vpxor %ymm1,%ymm13,%ymm1 + vpslld $7,%ymm1,%ymm14 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm1,%ymm14,%ymm1 + vmovdqa %ymm12,0(%rsp) + vmovdqa %ymm13,32(%rsp) + vmovdqa 64(%rsp),%ymm12 + vmovdqa 96(%rsp),%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vpxor %ymm6,%ymm10,%ymm6 + vpshufb %ymm15,%ymm6,%ymm6 + vpaddd %ymm3,%ymm11,%ymm11 + vpxor %ymm7,%ymm11,%ymm7 + vpshufb %ymm15,%ymm7,%ymm7 + vpaddd %ymm6,%ymm12,%ymm12 + vpxor %ymm2,%ymm12,%ymm2 + vpslld $12,%ymm2,%ymm14 + vpsrld $20,%ymm2,%ymm2 + vpor %ymm2,%ymm14,%ymm2 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm7,%ymm13,%ymm13 + vpxor %ymm3,%ymm13,%ymm3 + vpslld $12,%ymm3,%ymm15 + vpsrld $20,%ymm3,%ymm3 + vpor %ymm3,%ymm15,%ymm3 + vpaddd %ymm2,%ymm10,%ymm10 + vpxor %ymm6,%ymm10,%ymm6 + vpshufb %ymm14,%ymm6,%ymm6 + vpaddd %ymm3,%ymm11,%ymm11 + vpxor %ymm7,%ymm11,%ymm7 + vpshufb %ymm14,%ymm7,%ymm7 + vpaddd %ymm6,%ymm12,%ymm12 + vpxor %ymm2,%ymm12,%ymm2 + vpslld $7,%ymm2,%ymm15 + vpsrld $25,%ymm2,%ymm2 + vpor %ymm2,%ymm15,%ymm2 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm7,%ymm13,%ymm13 + vpxor %ymm3,%ymm13,%ymm3 + vpslld $7,%ymm3,%ymm14 + vpsrld $25,%ymm3,%ymm3 + vpor %ymm3,%ymm14,%ymm3 + vpaddd %ymm1,%ymm8,%ymm8 + vpxor %ymm7,%ymm8,%ymm7 + vpshufb %ymm15,%ymm7,%ymm7 + vpaddd %ymm2,%ymm9,%ymm9 + vpxor %ymm4,%ymm9,%ymm4 + vpshufb %ymm15,%ymm4,%ymm4 + vpaddd %ymm7,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm1 + vpslld $12,%ymm1,%ymm14 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm1,%ymm14,%ymm1 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm4,%ymm13,%ymm13 + vpxor %ymm2,%ymm13,%ymm2 + vpslld $12,%ymm2,%ymm15 + vpsrld $20,%ymm2,%ymm2 + vpor %ymm2,%ymm15,%ymm2 + vpaddd %ymm1,%ymm8,%ymm8 + vpxor %ymm7,%ymm8,%ymm7 + vpshufb %ymm14,%ymm7,%ymm7 + vpaddd %ymm2,%ymm9,%ymm9 + vpxor %ymm4,%ymm9,%ymm4 + vpshufb %ymm14,%ymm4,%ymm4 + vpaddd %ymm7,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm1 + vpslld $7,%ymm1,%ymm15 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm1,%ymm15,%ymm1 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm4,%ymm13,%ymm13 + vpxor %ymm2,%ymm13,%ymm2 + vpslld $7,%ymm2,%ymm14 + vpsrld $25,%ymm2,%ymm2 + vpor %ymm2,%ymm14,%ymm2 + vmovdqa %ymm12,64(%rsp) + vmovdqa %ymm13,96(%rsp) + vmovdqa 0(%rsp),%ymm12 + vmovdqa 32(%rsp),%ymm13 + vpaddd %ymm3,%ymm10,%ymm10 + vpxor %ymm5,%ymm10,%ymm5 + vpshufb %ymm15,%ymm5,%ymm5 + vpaddd %ymm0,%ymm11,%ymm11 + vpxor %ymm6,%ymm11,%ymm6 + vpshufb %ymm15,%ymm6,%ymm6 + vpaddd %ymm5,%ymm12,%ymm12 + vpxor %ymm3,%ymm12,%ymm3 + vpslld $12,%ymm3,%ymm14 + vpsrld $20,%ymm3,%ymm3 + vpor %ymm3,%ymm14,%ymm3 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm6,%ymm13,%ymm13 + vpxor %ymm0,%ymm13,%ymm0 + vpslld $12,%ymm0,%ymm15 + vpsrld $20,%ymm0,%ymm0 + vpor %ymm0,%ymm15,%ymm0 + vpaddd %ymm3,%ymm10,%ymm10 + vpxor %ymm5,%ymm10,%ymm5 + vpshufb %ymm14,%ymm5,%ymm5 + vpaddd %ymm0,%ymm11,%ymm11 + vpxor %ymm6,%ymm11,%ymm6 + vpshufb %ymm14,%ymm6,%ymm6 + vpaddd %ymm5,%ymm12,%ymm12 + vpxor %ymm3,%ymm12,%ymm3 + vpslld $7,%ymm3,%ymm15 + vpsrld $25,%ymm3,%ymm3 + vpor %ymm3,%ymm15,%ymm3 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm6,%ymm13,%ymm13 + vpxor %ymm0,%ymm13,%ymm0 + vpslld $7,%ymm0,%ymm14 + vpsrld $25,%ymm0,%ymm0 + vpor %ymm0,%ymm14,%ymm0 + decl %eax + jnz .Loop8x + + leaq 512(%rsp),%rax + vpaddd 128-256(%rcx),%ymm8,%ymm8 + vpaddd 160-256(%rcx),%ymm9,%ymm9 + vpaddd 192-256(%rcx),%ymm10,%ymm10 + vpaddd 224-256(%rcx),%ymm11,%ymm11 + + vpunpckldq %ymm9,%ymm8,%ymm14 + vpunpckldq %ymm11,%ymm10,%ymm15 + vpunpckhdq %ymm9,%ymm8,%ymm8 + vpunpckhdq %ymm11,%ymm10,%ymm10 + vpunpcklqdq %ymm15,%ymm14,%ymm9 + vpunpckhqdq %ymm15,%ymm14,%ymm14 + vpunpcklqdq %ymm10,%ymm8,%ymm11 + vpunpckhqdq %ymm10,%ymm8,%ymm8 + vpaddd 256-256(%rcx),%ymm0,%ymm0 + vpaddd 288-256(%rcx),%ymm1,%ymm1 + vpaddd 320-256(%rcx),%ymm2,%ymm2 + vpaddd 352-256(%rcx),%ymm3,%ymm3 + + vpunpckldq %ymm1,%ymm0,%ymm10 + vpunpckldq %ymm3,%ymm2,%ymm15 + vpunpckhdq %ymm1,%ymm0,%ymm0 + vpunpckhdq %ymm3,%ymm2,%ymm2 + vpunpcklqdq %ymm15,%ymm10,%ymm1 + vpunpckhqdq %ymm15,%ymm10,%ymm10 + vpunpcklqdq %ymm2,%ymm0,%ymm3 + vpunpckhqdq %ymm2,%ymm0,%ymm0 + vperm2i128 $0x20,%ymm1,%ymm9,%ymm15 + vperm2i128 $0x31,%ymm1,%ymm9,%ymm1 + vperm2i128 $0x20,%ymm10,%ymm14,%ymm9 + vperm2i128 $0x31,%ymm10,%ymm14,%ymm10 + vperm2i128 $0x20,%ymm3,%ymm11,%ymm14 + vperm2i128 $0x31,%ymm3,%ymm11,%ymm3 + vperm2i128 $0x20,%ymm0,%ymm8,%ymm11 + vperm2i128 $0x31,%ymm0,%ymm8,%ymm0 + vmovdqa %ymm15,0(%rsp) + vmovdqa %ymm9,32(%rsp) + vmovdqa 64(%rsp),%ymm15 + vmovdqa 96(%rsp),%ymm9 + + vpaddd 384-512(%rax),%ymm12,%ymm12 + vpaddd 416-512(%rax),%ymm13,%ymm13 + vpaddd 448-512(%rax),%ymm15,%ymm15 + vpaddd 480-512(%rax),%ymm9,%ymm9 + + vpunpckldq %ymm13,%ymm12,%ymm2 + vpunpckldq %ymm9,%ymm15,%ymm8 + vpunpckhdq %ymm13,%ymm12,%ymm12 + vpunpckhdq %ymm9,%ymm15,%ymm15 + vpunpcklqdq %ymm8,%ymm2,%ymm13 + vpunpckhqdq %ymm8,%ymm2,%ymm2 + vpunpcklqdq %ymm15,%ymm12,%ymm9 + vpunpckhqdq %ymm15,%ymm12,%ymm12 + vpaddd 512-512(%rax),%ymm4,%ymm4 + vpaddd 544-512(%rax),%ymm5,%ymm5 + vpaddd 576-512(%rax),%ymm6,%ymm6 + vpaddd 608-512(%rax),%ymm7,%ymm7 + + vpunpckldq %ymm5,%ymm4,%ymm15 + vpunpckldq %ymm7,%ymm6,%ymm8 + vpunpckhdq %ymm5,%ymm4,%ymm4 + vpunpckhdq %ymm7,%ymm6,%ymm6 + vpunpcklqdq %ymm8,%ymm15,%ymm5 + vpunpckhqdq %ymm8,%ymm15,%ymm15 + vpunpcklqdq %ymm6,%ymm4,%ymm7 + vpunpckhqdq %ymm6,%ymm4,%ymm4 + vperm2i128 $0x20,%ymm5,%ymm13,%ymm8 + vperm2i128 $0x31,%ymm5,%ymm13,%ymm5 + vperm2i128 $0x20,%ymm15,%ymm2,%ymm13 + vperm2i128 $0x31,%ymm15,%ymm2,%ymm15 + vperm2i128 $0x20,%ymm7,%ymm9,%ymm2 + vperm2i128 $0x31,%ymm7,%ymm9,%ymm7 + vperm2i128 $0x20,%ymm4,%ymm12,%ymm9 + vperm2i128 $0x31,%ymm4,%ymm12,%ymm4 + vmovdqa 0(%rsp),%ymm6 + vmovdqa 32(%rsp),%ymm12 + + cmpq $512,%rdx + jb .Ltail8x + + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + leaq 128(%rsi),%rsi + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm12,%ymm12 + vpxor 32(%rsi),%ymm13,%ymm13 + vpxor 64(%rsi),%ymm10,%ymm10 + vpxor 96(%rsi),%ymm15,%ymm15 + leaq 128(%rsi),%rsi + vmovdqu %ymm12,0(%rdi) + vmovdqu %ymm13,32(%rdi) + vmovdqu %ymm10,64(%rdi) + vmovdqu %ymm15,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm14,%ymm14 + vpxor 32(%rsi),%ymm2,%ymm2 + vpxor 64(%rsi),%ymm3,%ymm3 + vpxor 96(%rsi),%ymm7,%ymm7 + leaq 128(%rsi),%rsi + vmovdqu %ymm14,0(%rdi) + vmovdqu %ymm2,32(%rdi) + vmovdqu %ymm3,64(%rdi) + vmovdqu %ymm7,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm11,%ymm11 + vpxor 32(%rsi),%ymm9,%ymm9 + vpxor 64(%rsi),%ymm0,%ymm0 + vpxor 96(%rsi),%ymm4,%ymm4 + leaq 128(%rsi),%rsi + vmovdqu %ymm11,0(%rdi) + vmovdqu %ymm9,32(%rdi) + vmovdqu %ymm0,64(%rdi) + vmovdqu %ymm4,96(%rdi) + leaq 128(%rdi),%rdi + + subq $512,%rdx + jnz .Loop_outer8x + + jmp .Ldone8x + +.Ltail8x: + cmpq $448,%rdx + jae .L448_or_more8x + cmpq $384,%rdx + jae .L384_or_more8x + cmpq $320,%rdx + jae .L320_or_more8x + cmpq $256,%rdx + jae .L256_or_more8x + cmpq $192,%rdx + jae .L192_or_more8x + cmpq $128,%rdx + jae .L128_or_more8x + cmpq $64,%rdx + jae .L64_or_more8x + + xorq %r10,%r10 + vmovdqa %ymm6,0(%rsp) + vmovdqa %ymm8,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L64_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + je .Ldone8x + + leaq 64(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm1,0(%rsp) + leaq 64(%rdi),%rdi + subq $64,%rdx + vmovdqa %ymm5,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L128_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + je .Ldone8x + + leaq 128(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm12,0(%rsp) + leaq 128(%rdi),%rdi + subq $128,%rdx + vmovdqa %ymm13,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L192_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + je .Ldone8x + + leaq 192(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm10,0(%rsp) + leaq 192(%rdi),%rdi + subq $192,%rdx + vmovdqa %ymm15,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L256_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + je .Ldone8x + + leaq 256(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm14,0(%rsp) + leaq 256(%rdi),%rdi + subq $256,%rdx + vmovdqa %ymm2,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L320_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + je .Ldone8x + + leaq 320(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm3,0(%rsp) + leaq 320(%rdi),%rdi + subq $320,%rdx + vmovdqa %ymm7,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L384_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vpxor 320(%rsi),%ymm3,%ymm3 + vpxor 352(%rsi),%ymm7,%ymm7 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm7,352(%rdi) + je .Ldone8x + + leaq 384(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm11,0(%rsp) + leaq 384(%rdi),%rdi + subq $384,%rdx + vmovdqa %ymm9,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L448_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vpxor 320(%rsi),%ymm3,%ymm3 + vpxor 352(%rsi),%ymm7,%ymm7 + vpxor 384(%rsi),%ymm11,%ymm11 + vpxor 416(%rsi),%ymm9,%ymm9 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm7,352(%rdi) + vmovdqu %ymm11,384(%rdi) + vmovdqu %ymm9,416(%rdi) + je .Ldone8x + + leaq 448(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm0,0(%rsp) + leaq 448(%rdi),%rdi + subq $448,%rdx + vmovdqa %ymm4,32(%rsp) + +.Loop_tail8x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail8x + +.Ldone8x: + vzeroall + leaq (%r9),%rsp +.cfi_def_cfa_register rsp +.L8x_epilogue: + ret +.cfi_endproc +.size ChaCha20_ctr32_avx2,.-ChaCha20_ctr32_avx2 +#endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-win.asm b/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-win.asm new file mode 100644 index 0000000000..14f239511f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha-x86_64-win.asm @@ -0,0 +1,1916 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + +section .rdata rdata align=8 +ALIGN 64 +$L$zero: + DD 0,0,0,0 +$L$one: + DD 1,0,0,0 +$L$inc: + DD 0,1,2,3 +$L$four: + DD 4,4,4,4 +$L$incy: + DD 0,2,4,6,1,3,5,7 +$L$eight: + DD 8,8,8,8,8,8,8,8 +$L$rot16: + DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd +$L$rot24: + DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe +$L$sigma: + DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 + DB 0 +ALIGN 64 +$L$zeroz: + DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 +$L$fourz: + DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 +$L$incz: + DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +$L$sixteen: + DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 + DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 + DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 + DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 + DB 108,46,111,114,103,62,0 +section .text + +global ChaCha20_ctr32_nohw + +ALIGN 64 +ChaCha20_ctr32_nohw: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ChaCha20_ctr32_nohw: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,64+24 + +$L$ctr32_body: + + + movdqu xmm1,XMMWORD[rcx] + movdqu xmm2,XMMWORD[16+rcx] + movdqu xmm3,XMMWORD[r8] + movdqa xmm4,XMMWORD[$L$one] + + + movdqa XMMWORD[16+rsp],xmm1 + movdqa XMMWORD[32+rsp],xmm2 + movdqa XMMWORD[48+rsp],xmm3 + mov rbp,rdx + jmp NEAR $L$oop_outer + +ALIGN 32 +$L$oop_outer: + mov eax,0x61707865 + mov ebx,0x3320646e + mov ecx,0x79622d32 + mov edx,0x6b206574 + mov r8d,DWORD[16+rsp] + mov r9d,DWORD[20+rsp] + mov r10d,DWORD[24+rsp] + mov r11d,DWORD[28+rsp] + movd r12d,xmm3 + mov r13d,DWORD[52+rsp] + mov r14d,DWORD[56+rsp] + mov r15d,DWORD[60+rsp] + + mov QWORD[((64+0))+rsp],rbp + mov ebp,10 + mov QWORD[((64+8))+rsp],rsi +DB 102,72,15,126,214 + mov QWORD[((64+16))+rsp],rdi + mov rdi,rsi + shr rdi,32 + jmp NEAR $L$oop + +ALIGN 32 +$L$oop: + add eax,r8d + xor r12d,eax + rol r12d,16 + add ebx,r9d + xor r13d,ebx + rol r13d,16 + add esi,r12d + xor r8d,esi + rol r8d,12 + add edi,r13d + xor r9d,edi + rol r9d,12 + add eax,r8d + xor r12d,eax + rol r12d,8 + add ebx,r9d + xor r13d,ebx + rol r13d,8 + add esi,r12d + xor r8d,esi + rol r8d,7 + add edi,r13d + xor r9d,edi + rol r9d,7 + mov DWORD[32+rsp],esi + mov DWORD[36+rsp],edi + mov esi,DWORD[40+rsp] + mov edi,DWORD[44+rsp] + add ecx,r10d + xor r14d,ecx + rol r14d,16 + add edx,r11d + xor r15d,edx + rol r15d,16 + add esi,r14d + xor r10d,esi + rol r10d,12 + add edi,r15d + xor r11d,edi + rol r11d,12 + add ecx,r10d + xor r14d,ecx + rol r14d,8 + add edx,r11d + xor r15d,edx + rol r15d,8 + add esi,r14d + xor r10d,esi + rol r10d,7 + add edi,r15d + xor r11d,edi + rol r11d,7 + add eax,r9d + xor r15d,eax + rol r15d,16 + add ebx,r10d + xor r12d,ebx + rol r12d,16 + add esi,r15d + xor r9d,esi + rol r9d,12 + add edi,r12d + xor r10d,edi + rol r10d,12 + add eax,r9d + xor r15d,eax + rol r15d,8 + add ebx,r10d + xor r12d,ebx + rol r12d,8 + add esi,r15d + xor r9d,esi + rol r9d,7 + add edi,r12d + xor r10d,edi + rol r10d,7 + mov DWORD[40+rsp],esi + mov DWORD[44+rsp],edi + mov esi,DWORD[32+rsp] + mov edi,DWORD[36+rsp] + add ecx,r11d + xor r13d,ecx + rol r13d,16 + add edx,r8d + xor r14d,edx + rol r14d,16 + add esi,r13d + xor r11d,esi + rol r11d,12 + add edi,r14d + xor r8d,edi + rol r8d,12 + add ecx,r11d + xor r13d,ecx + rol r13d,8 + add edx,r8d + xor r14d,edx + rol r14d,8 + add esi,r13d + xor r11d,esi + rol r11d,7 + add edi,r14d + xor r8d,edi + rol r8d,7 + dec ebp + jnz NEAR $L$oop + mov DWORD[36+rsp],edi + mov DWORD[32+rsp],esi + mov rbp,QWORD[64+rsp] + movdqa xmm1,xmm2 + mov rsi,QWORD[((64+8))+rsp] + paddd xmm3,xmm4 + mov rdi,QWORD[((64+16))+rsp] + + add eax,0x61707865 + add ebx,0x3320646e + add ecx,0x79622d32 + add edx,0x6b206574 + add r8d,DWORD[16+rsp] + add r9d,DWORD[20+rsp] + add r10d,DWORD[24+rsp] + add r11d,DWORD[28+rsp] + add r12d,DWORD[48+rsp] + add r13d,DWORD[52+rsp] + add r14d,DWORD[56+rsp] + add r15d,DWORD[60+rsp] + paddd xmm1,XMMWORD[32+rsp] + + cmp rbp,64 + jb NEAR $L$tail + + xor eax,DWORD[rsi] + xor ebx,DWORD[4+rsi] + xor ecx,DWORD[8+rsi] + xor edx,DWORD[12+rsi] + xor r8d,DWORD[16+rsi] + xor r9d,DWORD[20+rsi] + xor r10d,DWORD[24+rsi] + xor r11d,DWORD[28+rsi] + movdqu xmm0,XMMWORD[32+rsi] + xor r12d,DWORD[48+rsi] + xor r13d,DWORD[52+rsi] + xor r14d,DWORD[56+rsi] + xor r15d,DWORD[60+rsi] + lea rsi,[64+rsi] + pxor xmm0,xmm1 + + movdqa XMMWORD[32+rsp],xmm2 + movd DWORD[48+rsp],xmm3 + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + movdqu XMMWORD[32+rdi],xmm0 + mov DWORD[48+rdi],r12d + mov DWORD[52+rdi],r13d + mov DWORD[56+rdi],r14d + mov DWORD[60+rdi],r15d + lea rdi,[64+rdi] + + sub rbp,64 + jnz NEAR $L$oop_outer + + jmp NEAR $L$done + +ALIGN 16 +$L$tail: + mov DWORD[rsp],eax + mov DWORD[4+rsp],ebx + xor rbx,rbx + mov DWORD[8+rsp],ecx + mov DWORD[12+rsp],edx + mov DWORD[16+rsp],r8d + mov DWORD[20+rsp],r9d + mov DWORD[24+rsp],r10d + mov DWORD[28+rsp],r11d + movdqa XMMWORD[32+rsp],xmm1 + mov DWORD[48+rsp],r12d + mov DWORD[52+rsp],r13d + mov DWORD[56+rsp],r14d + mov DWORD[60+rsp],r15d + +$L$oop_tail: + movzx eax,BYTE[rbx*1+rsi] + movzx edx,BYTE[rbx*1+rsp] + lea rbx,[1+rbx] + xor eax,edx + mov BYTE[((-1))+rbx*1+rdi],al + dec rbp + jnz NEAR $L$oop_tail + +$L$done: + lea rsi,[((64+24+48))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$no_data: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ChaCha20_ctr32_nohw: +global ChaCha20_ctr32_ssse3 + +ALIGN 32 +ChaCha20_ctr32_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ChaCha20_ctr32_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + mov r9,rsp + + sub rsp,64+40 + movaps XMMWORD[(-40)+r9],xmm6 + movaps XMMWORD[(-24)+r9],xmm7 +$L$ssse3_body: + movdqa xmm0,XMMWORD[$L$sigma] + movdqu xmm1,XMMWORD[rcx] + movdqu xmm2,XMMWORD[16+rcx] + movdqu xmm3,XMMWORD[r8] + movdqa xmm6,XMMWORD[$L$rot16] + movdqa xmm7,XMMWORD[$L$rot24] + + movdqa XMMWORD[rsp],xmm0 + movdqa XMMWORD[16+rsp],xmm1 + movdqa XMMWORD[32+rsp],xmm2 + movdqa XMMWORD[48+rsp],xmm3 + mov r8,10 + jmp NEAR $L$oop_ssse3 + +ALIGN 32 +$L$oop_outer_ssse3: + movdqa xmm3,XMMWORD[$L$one] + movdqa xmm0,XMMWORD[rsp] + movdqa xmm1,XMMWORD[16+rsp] + movdqa xmm2,XMMWORD[32+rsp] + paddd xmm3,XMMWORD[48+rsp] + mov r8,10 + movdqa XMMWORD[48+rsp],xmm3 + jmp NEAR $L$oop_ssse3 + +ALIGN 32 +$L$oop_ssse3: + paddd xmm0,xmm1 + pxor xmm3,xmm0 +DB 102,15,56,0,222 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,20 + pslld xmm4,12 + por xmm1,xmm4 + paddd xmm0,xmm1 + pxor xmm3,xmm0 +DB 102,15,56,0,223 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,25 + pslld xmm4,7 + por xmm1,xmm4 + pshufd xmm2,xmm2,78 + pshufd xmm1,xmm1,57 + pshufd xmm3,xmm3,147 + nop + paddd xmm0,xmm1 + pxor xmm3,xmm0 +DB 102,15,56,0,222 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,20 + pslld xmm4,12 + por xmm1,xmm4 + paddd xmm0,xmm1 + pxor xmm3,xmm0 +DB 102,15,56,0,223 + paddd xmm2,xmm3 + pxor xmm1,xmm2 + movdqa xmm4,xmm1 + psrld xmm1,25 + pslld xmm4,7 + por xmm1,xmm4 + pshufd xmm2,xmm2,78 + pshufd xmm1,xmm1,147 + pshufd xmm3,xmm3,57 + dec r8 + jnz NEAR $L$oop_ssse3 + paddd xmm0,XMMWORD[rsp] + paddd xmm1,XMMWORD[16+rsp] + paddd xmm2,XMMWORD[32+rsp] + paddd xmm3,XMMWORD[48+rsp] + + cmp rdx,64 + jb NEAR $L$tail_ssse3 + + movdqu xmm4,XMMWORD[rsi] + movdqu xmm5,XMMWORD[16+rsi] + pxor xmm0,xmm4 + movdqu xmm4,XMMWORD[32+rsi] + pxor xmm1,xmm5 + movdqu xmm5,XMMWORD[48+rsi] + lea rsi,[64+rsi] + pxor xmm2,xmm4 + pxor xmm3,xmm5 + + movdqu XMMWORD[rdi],xmm0 + movdqu XMMWORD[16+rdi],xmm1 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm3 + lea rdi,[64+rdi] + + sub rdx,64 + jnz NEAR $L$oop_outer_ssse3 + + jmp NEAR $L$done_ssse3 + +ALIGN 16 +$L$tail_ssse3: + movdqa XMMWORD[rsp],xmm0 + movdqa XMMWORD[16+rsp],xmm1 + movdqa XMMWORD[32+rsp],xmm2 + movdqa XMMWORD[48+rsp],xmm3 + xor r8,r8 + +$L$oop_tail_ssse3: + movzx eax,BYTE[r8*1+rsi] + movzx ecx,BYTE[r8*1+rsp] + lea r8,[1+r8] + xor eax,ecx + mov BYTE[((-1))+r8*1+rdi],al + dec rdx + jnz NEAR $L$oop_tail_ssse3 + +$L$done_ssse3: + movaps xmm6,XMMWORD[((-40))+r9] + movaps xmm7,XMMWORD[((-24))+r9] + lea rsp,[r9] + +$L$ssse3_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ChaCha20_ctr32_ssse3: +global ChaCha20_ctr32_ssse3_4x + +ALIGN 32 +ChaCha20_ctr32_ssse3_4x: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ChaCha20_ctr32_ssse3_4x: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + mov r9,rsp + + mov r11,r10 + sub rsp,0x140+168 + movaps XMMWORD[(-168)+r9],xmm6 + movaps XMMWORD[(-152)+r9],xmm7 + movaps XMMWORD[(-136)+r9],xmm8 + movaps XMMWORD[(-120)+r9],xmm9 + movaps XMMWORD[(-104)+r9],xmm10 + movaps XMMWORD[(-88)+r9],xmm11 + movaps XMMWORD[(-72)+r9],xmm12 + movaps XMMWORD[(-56)+r9],xmm13 + movaps XMMWORD[(-40)+r9],xmm14 + movaps XMMWORD[(-24)+r9],xmm15 +$L$4x_body: + movdqa xmm11,XMMWORD[$L$sigma] + movdqu xmm15,XMMWORD[rcx] + movdqu xmm7,XMMWORD[16+rcx] + movdqu xmm3,XMMWORD[r8] + lea rcx,[256+rsp] + lea r10,[$L$rot16] + lea r11,[$L$rot24] + + pshufd xmm8,xmm11,0x00 + pshufd xmm9,xmm11,0x55 + movdqa XMMWORD[64+rsp],xmm8 + pshufd xmm10,xmm11,0xaa + movdqa XMMWORD[80+rsp],xmm9 + pshufd xmm11,xmm11,0xff + movdqa XMMWORD[96+rsp],xmm10 + movdqa XMMWORD[112+rsp],xmm11 + + pshufd xmm12,xmm15,0x00 + pshufd xmm13,xmm15,0x55 + movdqa XMMWORD[(128-256)+rcx],xmm12 + pshufd xmm14,xmm15,0xaa + movdqa XMMWORD[(144-256)+rcx],xmm13 + pshufd xmm15,xmm15,0xff + movdqa XMMWORD[(160-256)+rcx],xmm14 + movdqa XMMWORD[(176-256)+rcx],xmm15 + + pshufd xmm4,xmm7,0x00 + pshufd xmm5,xmm7,0x55 + movdqa XMMWORD[(192-256)+rcx],xmm4 + pshufd xmm6,xmm7,0xaa + movdqa XMMWORD[(208-256)+rcx],xmm5 + pshufd xmm7,xmm7,0xff + movdqa XMMWORD[(224-256)+rcx],xmm6 + movdqa XMMWORD[(240-256)+rcx],xmm7 + + pshufd xmm0,xmm3,0x00 + pshufd xmm1,xmm3,0x55 + paddd xmm0,XMMWORD[$L$inc] + pshufd xmm2,xmm3,0xaa + movdqa XMMWORD[(272-256)+rcx],xmm1 + pshufd xmm3,xmm3,0xff + movdqa XMMWORD[(288-256)+rcx],xmm2 + movdqa XMMWORD[(304-256)+rcx],xmm3 + + jmp NEAR $L$oop_enter4x + +ALIGN 32 +$L$oop_outer4x: + movdqa xmm8,XMMWORD[64+rsp] + movdqa xmm9,XMMWORD[80+rsp] + movdqa xmm10,XMMWORD[96+rsp] + movdqa xmm11,XMMWORD[112+rsp] + movdqa xmm12,XMMWORD[((128-256))+rcx] + movdqa xmm13,XMMWORD[((144-256))+rcx] + movdqa xmm14,XMMWORD[((160-256))+rcx] + movdqa xmm15,XMMWORD[((176-256))+rcx] + movdqa xmm4,XMMWORD[((192-256))+rcx] + movdqa xmm5,XMMWORD[((208-256))+rcx] + movdqa xmm6,XMMWORD[((224-256))+rcx] + movdqa xmm7,XMMWORD[((240-256))+rcx] + movdqa xmm0,XMMWORD[((256-256))+rcx] + movdqa xmm1,XMMWORD[((272-256))+rcx] + movdqa xmm2,XMMWORD[((288-256))+rcx] + movdqa xmm3,XMMWORD[((304-256))+rcx] + paddd xmm0,XMMWORD[$L$four] + +$L$oop_enter4x: + movdqa XMMWORD[32+rsp],xmm6 + movdqa XMMWORD[48+rsp],xmm7 + movdqa xmm7,XMMWORD[r10] + mov eax,10 + movdqa XMMWORD[(256-256)+rcx],xmm0 + jmp NEAR $L$oop4x + +ALIGN 32 +$L$oop4x: + paddd xmm8,xmm12 + paddd xmm9,xmm13 + pxor xmm0,xmm8 + pxor xmm1,xmm9 +DB 102,15,56,0,199 +DB 102,15,56,0,207 + paddd xmm4,xmm0 + paddd xmm5,xmm1 + pxor xmm12,xmm4 + pxor xmm13,xmm5 + movdqa xmm6,xmm12 + pslld xmm12,12 + psrld xmm6,20 + movdqa xmm7,xmm13 + pslld xmm13,12 + por xmm12,xmm6 + psrld xmm7,20 + movdqa xmm6,XMMWORD[r11] + por xmm13,xmm7 + paddd xmm8,xmm12 + paddd xmm9,xmm13 + pxor xmm0,xmm8 + pxor xmm1,xmm9 +DB 102,15,56,0,198 +DB 102,15,56,0,206 + paddd xmm4,xmm0 + paddd xmm5,xmm1 + pxor xmm12,xmm4 + pxor xmm13,xmm5 + movdqa xmm7,xmm12 + pslld xmm12,7 + psrld xmm7,25 + movdqa xmm6,xmm13 + pslld xmm13,7 + por xmm12,xmm7 + psrld xmm6,25 + movdqa xmm7,XMMWORD[r10] + por xmm13,xmm6 + movdqa XMMWORD[rsp],xmm4 + movdqa XMMWORD[16+rsp],xmm5 + movdqa xmm4,XMMWORD[32+rsp] + movdqa xmm5,XMMWORD[48+rsp] + paddd xmm10,xmm14 + paddd xmm11,xmm15 + pxor xmm2,xmm10 + pxor xmm3,xmm11 +DB 102,15,56,0,215 +DB 102,15,56,0,223 + paddd xmm4,xmm2 + paddd xmm5,xmm3 + pxor xmm14,xmm4 + pxor xmm15,xmm5 + movdqa xmm6,xmm14 + pslld xmm14,12 + psrld xmm6,20 + movdqa xmm7,xmm15 + pslld xmm15,12 + por xmm14,xmm6 + psrld xmm7,20 + movdqa xmm6,XMMWORD[r11] + por xmm15,xmm7 + paddd xmm10,xmm14 + paddd xmm11,xmm15 + pxor xmm2,xmm10 + pxor xmm3,xmm11 +DB 102,15,56,0,214 +DB 102,15,56,0,222 + paddd xmm4,xmm2 + paddd xmm5,xmm3 + pxor xmm14,xmm4 + pxor xmm15,xmm5 + movdqa xmm7,xmm14 + pslld xmm14,7 + psrld xmm7,25 + movdqa xmm6,xmm15 + pslld xmm15,7 + por xmm14,xmm7 + psrld xmm6,25 + movdqa xmm7,XMMWORD[r10] + por xmm15,xmm6 + paddd xmm8,xmm13 + paddd xmm9,xmm14 + pxor xmm3,xmm8 + pxor xmm0,xmm9 +DB 102,15,56,0,223 +DB 102,15,56,0,199 + paddd xmm4,xmm3 + paddd xmm5,xmm0 + pxor xmm13,xmm4 + pxor xmm14,xmm5 + movdqa xmm6,xmm13 + pslld xmm13,12 + psrld xmm6,20 + movdqa xmm7,xmm14 + pslld xmm14,12 + por xmm13,xmm6 + psrld xmm7,20 + movdqa xmm6,XMMWORD[r11] + por xmm14,xmm7 + paddd xmm8,xmm13 + paddd xmm9,xmm14 + pxor xmm3,xmm8 + pxor xmm0,xmm9 +DB 102,15,56,0,222 +DB 102,15,56,0,198 + paddd xmm4,xmm3 + paddd xmm5,xmm0 + pxor xmm13,xmm4 + pxor xmm14,xmm5 + movdqa xmm7,xmm13 + pslld xmm13,7 + psrld xmm7,25 + movdqa xmm6,xmm14 + pslld xmm14,7 + por xmm13,xmm7 + psrld xmm6,25 + movdqa xmm7,XMMWORD[r10] + por xmm14,xmm6 + movdqa XMMWORD[32+rsp],xmm4 + movdqa XMMWORD[48+rsp],xmm5 + movdqa xmm4,XMMWORD[rsp] + movdqa xmm5,XMMWORD[16+rsp] + paddd xmm10,xmm15 + paddd xmm11,xmm12 + pxor xmm1,xmm10 + pxor xmm2,xmm11 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + paddd xmm4,xmm1 + paddd xmm5,xmm2 + pxor xmm15,xmm4 + pxor xmm12,xmm5 + movdqa xmm6,xmm15 + pslld xmm15,12 + psrld xmm6,20 + movdqa xmm7,xmm12 + pslld xmm12,12 + por xmm15,xmm6 + psrld xmm7,20 + movdqa xmm6,XMMWORD[r11] + por xmm12,xmm7 + paddd xmm10,xmm15 + paddd xmm11,xmm12 + pxor xmm1,xmm10 + pxor xmm2,xmm11 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + paddd xmm4,xmm1 + paddd xmm5,xmm2 + pxor xmm15,xmm4 + pxor xmm12,xmm5 + movdqa xmm7,xmm15 + pslld xmm15,7 + psrld xmm7,25 + movdqa xmm6,xmm12 + pslld xmm12,7 + por xmm15,xmm7 + psrld xmm6,25 + movdqa xmm7,XMMWORD[r10] + por xmm12,xmm6 + dec eax + jnz NEAR $L$oop4x + + paddd xmm8,XMMWORD[64+rsp] + paddd xmm9,XMMWORD[80+rsp] + paddd xmm10,XMMWORD[96+rsp] + paddd xmm11,XMMWORD[112+rsp] + + movdqa xmm6,xmm8 + punpckldq xmm8,xmm9 + movdqa xmm7,xmm10 + punpckldq xmm10,xmm11 + punpckhdq xmm6,xmm9 + punpckhdq xmm7,xmm11 + movdqa xmm9,xmm8 + punpcklqdq xmm8,xmm10 + movdqa xmm11,xmm6 + punpcklqdq xmm6,xmm7 + punpckhqdq xmm9,xmm10 + punpckhqdq xmm11,xmm7 + paddd xmm12,XMMWORD[((128-256))+rcx] + paddd xmm13,XMMWORD[((144-256))+rcx] + paddd xmm14,XMMWORD[((160-256))+rcx] + paddd xmm15,XMMWORD[((176-256))+rcx] + + movdqa XMMWORD[rsp],xmm8 + movdqa XMMWORD[16+rsp],xmm9 + movdqa xmm8,XMMWORD[32+rsp] + movdqa xmm9,XMMWORD[48+rsp] + + movdqa xmm10,xmm12 + punpckldq xmm12,xmm13 + movdqa xmm7,xmm14 + punpckldq xmm14,xmm15 + punpckhdq xmm10,xmm13 + punpckhdq xmm7,xmm15 + movdqa xmm13,xmm12 + punpcklqdq xmm12,xmm14 + movdqa xmm15,xmm10 + punpcklqdq xmm10,xmm7 + punpckhqdq xmm13,xmm14 + punpckhqdq xmm15,xmm7 + paddd xmm4,XMMWORD[((192-256))+rcx] + paddd xmm5,XMMWORD[((208-256))+rcx] + paddd xmm8,XMMWORD[((224-256))+rcx] + paddd xmm9,XMMWORD[((240-256))+rcx] + + movdqa XMMWORD[32+rsp],xmm6 + movdqa XMMWORD[48+rsp],xmm11 + + movdqa xmm14,xmm4 + punpckldq xmm4,xmm5 + movdqa xmm7,xmm8 + punpckldq xmm8,xmm9 + punpckhdq xmm14,xmm5 + punpckhdq xmm7,xmm9 + movdqa xmm5,xmm4 + punpcklqdq xmm4,xmm8 + movdqa xmm9,xmm14 + punpcklqdq xmm14,xmm7 + punpckhqdq xmm5,xmm8 + punpckhqdq xmm9,xmm7 + paddd xmm0,XMMWORD[((256-256))+rcx] + paddd xmm1,XMMWORD[((272-256))+rcx] + paddd xmm2,XMMWORD[((288-256))+rcx] + paddd xmm3,XMMWORD[((304-256))+rcx] + + movdqa xmm8,xmm0 + punpckldq xmm0,xmm1 + movdqa xmm7,xmm2 + punpckldq xmm2,xmm3 + punpckhdq xmm8,xmm1 + punpckhdq xmm7,xmm3 + movdqa xmm1,xmm0 + punpcklqdq xmm0,xmm2 + movdqa xmm3,xmm8 + punpcklqdq xmm8,xmm7 + punpckhqdq xmm1,xmm2 + punpckhqdq xmm3,xmm7 + cmp rdx,64*4 + jb NEAR $L$tail4x + + movdqu xmm6,XMMWORD[rsi] + movdqu xmm11,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm7,XMMWORD[48+rsi] + pxor xmm6,XMMWORD[rsp] + pxor xmm11,xmm12 + pxor xmm2,xmm4 + pxor xmm7,xmm0 + + movdqu XMMWORD[rdi],xmm6 + movdqu xmm6,XMMWORD[64+rsi] + movdqu XMMWORD[16+rdi],xmm11 + movdqu xmm11,XMMWORD[80+rsi] + movdqu XMMWORD[32+rdi],xmm2 + movdqu xmm2,XMMWORD[96+rsi] + movdqu XMMWORD[48+rdi],xmm7 + movdqu xmm7,XMMWORD[112+rsi] + lea rsi,[128+rsi] + pxor xmm6,XMMWORD[16+rsp] + pxor xmm11,xmm13 + pxor xmm2,xmm5 + pxor xmm7,xmm1 + + movdqu XMMWORD[64+rdi],xmm6 + movdqu xmm6,XMMWORD[rsi] + movdqu XMMWORD[80+rdi],xmm11 + movdqu xmm11,XMMWORD[16+rsi] + movdqu XMMWORD[96+rdi],xmm2 + movdqu xmm2,XMMWORD[32+rsi] + movdqu XMMWORD[112+rdi],xmm7 + lea rdi,[128+rdi] + movdqu xmm7,XMMWORD[48+rsi] + pxor xmm6,XMMWORD[32+rsp] + pxor xmm11,xmm10 + pxor xmm2,xmm14 + pxor xmm7,xmm8 + + movdqu XMMWORD[rdi],xmm6 + movdqu xmm6,XMMWORD[64+rsi] + movdqu XMMWORD[16+rdi],xmm11 + movdqu xmm11,XMMWORD[80+rsi] + movdqu XMMWORD[32+rdi],xmm2 + movdqu xmm2,XMMWORD[96+rsi] + movdqu XMMWORD[48+rdi],xmm7 + movdqu xmm7,XMMWORD[112+rsi] + lea rsi,[128+rsi] + pxor xmm6,XMMWORD[48+rsp] + pxor xmm11,xmm15 + pxor xmm2,xmm9 + pxor xmm7,xmm3 + movdqu XMMWORD[64+rdi],xmm6 + movdqu XMMWORD[80+rdi],xmm11 + movdqu XMMWORD[96+rdi],xmm2 + movdqu XMMWORD[112+rdi],xmm7 + lea rdi,[128+rdi] + + sub rdx,64*4 + jnz NEAR $L$oop_outer4x + + jmp NEAR $L$done4x + +$L$tail4x: + cmp rdx,192 + jae NEAR $L$192_or_more4x + cmp rdx,128 + jae NEAR $L$128_or_more4x + cmp rdx,64 + jae NEAR $L$64_or_more4x + + + xor r10,r10 + + movdqa XMMWORD[16+rsp],xmm12 + movdqa XMMWORD[32+rsp],xmm4 + movdqa XMMWORD[48+rsp],xmm0 + jmp NEAR $L$oop_tail4x + +ALIGN 32 +$L$64_or_more4x: + movdqu xmm6,XMMWORD[rsi] + movdqu xmm11,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm7,XMMWORD[48+rsi] + pxor xmm6,XMMWORD[rsp] + pxor xmm11,xmm12 + pxor xmm2,xmm4 + pxor xmm7,xmm0 + movdqu XMMWORD[rdi],xmm6 + movdqu XMMWORD[16+rdi],xmm11 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm7 + je NEAR $L$done4x + + movdqa xmm6,XMMWORD[16+rsp] + lea rsi,[64+rsi] + xor r10,r10 + movdqa XMMWORD[rsp],xmm6 + movdqa XMMWORD[16+rsp],xmm13 + lea rdi,[64+rdi] + movdqa XMMWORD[32+rsp],xmm5 + sub rdx,64 + movdqa XMMWORD[48+rsp],xmm1 + jmp NEAR $L$oop_tail4x + +ALIGN 32 +$L$128_or_more4x: + movdqu xmm6,XMMWORD[rsi] + movdqu xmm11,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm7,XMMWORD[48+rsi] + pxor xmm6,XMMWORD[rsp] + pxor xmm11,xmm12 + pxor xmm2,xmm4 + pxor xmm7,xmm0 + + movdqu XMMWORD[rdi],xmm6 + movdqu xmm6,XMMWORD[64+rsi] + movdqu XMMWORD[16+rdi],xmm11 + movdqu xmm11,XMMWORD[80+rsi] + movdqu XMMWORD[32+rdi],xmm2 + movdqu xmm2,XMMWORD[96+rsi] + movdqu XMMWORD[48+rdi],xmm7 + movdqu xmm7,XMMWORD[112+rsi] + pxor xmm6,XMMWORD[16+rsp] + pxor xmm11,xmm13 + pxor xmm2,xmm5 + pxor xmm7,xmm1 + movdqu XMMWORD[64+rdi],xmm6 + movdqu XMMWORD[80+rdi],xmm11 + movdqu XMMWORD[96+rdi],xmm2 + movdqu XMMWORD[112+rdi],xmm7 + je NEAR $L$done4x + + movdqa xmm6,XMMWORD[32+rsp] + lea rsi,[128+rsi] + xor r10,r10 + movdqa XMMWORD[rsp],xmm6 + movdqa XMMWORD[16+rsp],xmm10 + lea rdi,[128+rdi] + movdqa XMMWORD[32+rsp],xmm14 + sub rdx,128 + movdqa XMMWORD[48+rsp],xmm8 + jmp NEAR $L$oop_tail4x + +ALIGN 32 +$L$192_or_more4x: + movdqu xmm6,XMMWORD[rsi] + movdqu xmm11,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] + movdqu xmm7,XMMWORD[48+rsi] + pxor xmm6,XMMWORD[rsp] + pxor xmm11,xmm12 + pxor xmm2,xmm4 + pxor xmm7,xmm0 + + movdqu XMMWORD[rdi],xmm6 + movdqu xmm6,XMMWORD[64+rsi] + movdqu XMMWORD[16+rdi],xmm11 + movdqu xmm11,XMMWORD[80+rsi] + movdqu XMMWORD[32+rdi],xmm2 + movdqu xmm2,XMMWORD[96+rsi] + movdqu XMMWORD[48+rdi],xmm7 + movdqu xmm7,XMMWORD[112+rsi] + lea rsi,[128+rsi] + pxor xmm6,XMMWORD[16+rsp] + pxor xmm11,xmm13 + pxor xmm2,xmm5 + pxor xmm7,xmm1 + + movdqu XMMWORD[64+rdi],xmm6 + movdqu xmm6,XMMWORD[rsi] + movdqu XMMWORD[80+rdi],xmm11 + movdqu xmm11,XMMWORD[16+rsi] + movdqu XMMWORD[96+rdi],xmm2 + movdqu xmm2,XMMWORD[32+rsi] + movdqu XMMWORD[112+rdi],xmm7 + lea rdi,[128+rdi] + movdqu xmm7,XMMWORD[48+rsi] + pxor xmm6,XMMWORD[32+rsp] + pxor xmm11,xmm10 + pxor xmm2,xmm14 + pxor xmm7,xmm8 + movdqu XMMWORD[rdi],xmm6 + movdqu XMMWORD[16+rdi],xmm11 + movdqu XMMWORD[32+rdi],xmm2 + movdqu XMMWORD[48+rdi],xmm7 + je NEAR $L$done4x + + movdqa xmm6,XMMWORD[48+rsp] + lea rsi,[64+rsi] + xor r10,r10 + movdqa XMMWORD[rsp],xmm6 + movdqa XMMWORD[16+rsp],xmm15 + lea rdi,[64+rdi] + movdqa XMMWORD[32+rsp],xmm9 + sub rdx,192 + movdqa XMMWORD[48+rsp],xmm3 + +$L$oop_tail4x: + movzx eax,BYTE[r10*1+rsi] + movzx ecx,BYTE[r10*1+rsp] + lea r10,[1+r10] + xor eax,ecx + mov BYTE[((-1))+r10*1+rdi],al + dec rdx + jnz NEAR $L$oop_tail4x + +$L$done4x: + movaps xmm6,XMMWORD[((-168))+r9] + movaps xmm7,XMMWORD[((-152))+r9] + movaps xmm8,XMMWORD[((-136))+r9] + movaps xmm9,XMMWORD[((-120))+r9] + movaps xmm10,XMMWORD[((-104))+r9] + movaps xmm11,XMMWORD[((-88))+r9] + movaps xmm12,XMMWORD[((-72))+r9] + movaps xmm13,XMMWORD[((-56))+r9] + movaps xmm14,XMMWORD[((-40))+r9] + movaps xmm15,XMMWORD[((-24))+r9] + lea rsp,[r9] + +$L$4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ChaCha20_ctr32_ssse3_4x: +global ChaCha20_ctr32_avx2 + +ALIGN 32 +ChaCha20_ctr32_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ChaCha20_ctr32_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +_CET_ENDBR + mov r9,rsp + + sub rsp,0x280+168 + and rsp,-32 + movaps XMMWORD[(-168)+r9],xmm6 + movaps XMMWORD[(-152)+r9],xmm7 + movaps XMMWORD[(-136)+r9],xmm8 + movaps XMMWORD[(-120)+r9],xmm9 + movaps XMMWORD[(-104)+r9],xmm10 + movaps XMMWORD[(-88)+r9],xmm11 + movaps XMMWORD[(-72)+r9],xmm12 + movaps XMMWORD[(-56)+r9],xmm13 + movaps XMMWORD[(-40)+r9],xmm14 + movaps XMMWORD[(-24)+r9],xmm15 +$L$8x_body: + vzeroupper + + + + + + + + + + + vbroadcasti128 ymm11,XMMWORD[$L$sigma] + vbroadcasti128 ymm3,XMMWORD[rcx] + vbroadcasti128 ymm15,XMMWORD[16+rcx] + vbroadcasti128 ymm7,XMMWORD[r8] + lea rcx,[256+rsp] + lea rax,[512+rsp] + lea r10,[$L$rot16] + lea r11,[$L$rot24] + + vpshufd ymm8,ymm11,0x00 + vpshufd ymm9,ymm11,0x55 + vmovdqa YMMWORD[(128-256)+rcx],ymm8 + vpshufd ymm10,ymm11,0xaa + vmovdqa YMMWORD[(160-256)+rcx],ymm9 + vpshufd ymm11,ymm11,0xff + vmovdqa YMMWORD[(192-256)+rcx],ymm10 + vmovdqa YMMWORD[(224-256)+rcx],ymm11 + + vpshufd ymm0,ymm3,0x00 + vpshufd ymm1,ymm3,0x55 + vmovdqa YMMWORD[(256-256)+rcx],ymm0 + vpshufd ymm2,ymm3,0xaa + vmovdqa YMMWORD[(288-256)+rcx],ymm1 + vpshufd ymm3,ymm3,0xff + vmovdqa YMMWORD[(320-256)+rcx],ymm2 + vmovdqa YMMWORD[(352-256)+rcx],ymm3 + + vpshufd ymm12,ymm15,0x00 + vpshufd ymm13,ymm15,0x55 + vmovdqa YMMWORD[(384-512)+rax],ymm12 + vpshufd ymm14,ymm15,0xaa + vmovdqa YMMWORD[(416-512)+rax],ymm13 + vpshufd ymm15,ymm15,0xff + vmovdqa YMMWORD[(448-512)+rax],ymm14 + vmovdqa YMMWORD[(480-512)+rax],ymm15 + + vpshufd ymm4,ymm7,0x00 + vpshufd ymm5,ymm7,0x55 + vpaddd ymm4,ymm4,YMMWORD[$L$incy] + vpshufd ymm6,ymm7,0xaa + vmovdqa YMMWORD[(544-512)+rax],ymm5 + vpshufd ymm7,ymm7,0xff + vmovdqa YMMWORD[(576-512)+rax],ymm6 + vmovdqa YMMWORD[(608-512)+rax],ymm7 + + jmp NEAR $L$oop_enter8x + +ALIGN 32 +$L$oop_outer8x: + vmovdqa ymm8,YMMWORD[((128-256))+rcx] + vmovdqa ymm9,YMMWORD[((160-256))+rcx] + vmovdqa ymm10,YMMWORD[((192-256))+rcx] + vmovdqa ymm11,YMMWORD[((224-256))+rcx] + vmovdqa ymm0,YMMWORD[((256-256))+rcx] + vmovdqa ymm1,YMMWORD[((288-256))+rcx] + vmovdqa ymm2,YMMWORD[((320-256))+rcx] + vmovdqa ymm3,YMMWORD[((352-256))+rcx] + vmovdqa ymm12,YMMWORD[((384-512))+rax] + vmovdqa ymm13,YMMWORD[((416-512))+rax] + vmovdqa ymm14,YMMWORD[((448-512))+rax] + vmovdqa ymm15,YMMWORD[((480-512))+rax] + vmovdqa ymm4,YMMWORD[((512-512))+rax] + vmovdqa ymm5,YMMWORD[((544-512))+rax] + vmovdqa ymm6,YMMWORD[((576-512))+rax] + vmovdqa ymm7,YMMWORD[((608-512))+rax] + vpaddd ymm4,ymm4,YMMWORD[$L$eight] + +$L$oop_enter8x: + vmovdqa YMMWORD[64+rsp],ymm14 + vmovdqa YMMWORD[96+rsp],ymm15 + vbroadcasti128 ymm15,XMMWORD[r10] + vmovdqa YMMWORD[(512-512)+rax],ymm4 + mov eax,10 + jmp NEAR $L$oop8x + +ALIGN 32 +$L$oop8x: + vpaddd ymm8,ymm8,ymm0 + vpxor ymm4,ymm8,ymm4 + vpshufb ymm4,ymm4,ymm15 + vpaddd ymm9,ymm9,ymm1 + vpxor ymm5,ymm9,ymm5 + vpshufb ymm5,ymm5,ymm15 + vpaddd ymm12,ymm12,ymm4 + vpxor ymm0,ymm12,ymm0 + vpslld ymm14,ymm0,12 + vpsrld ymm0,ymm0,20 + vpor ymm0,ymm14,ymm0 + vbroadcasti128 ymm14,XMMWORD[r11] + vpaddd ymm13,ymm13,ymm5 + vpxor ymm1,ymm13,ymm1 + vpslld ymm15,ymm1,12 + vpsrld ymm1,ymm1,20 + vpor ymm1,ymm15,ymm1 + vpaddd ymm8,ymm8,ymm0 + vpxor ymm4,ymm8,ymm4 + vpshufb ymm4,ymm4,ymm14 + vpaddd ymm9,ymm9,ymm1 + vpxor ymm5,ymm9,ymm5 + vpshufb ymm5,ymm5,ymm14 + vpaddd ymm12,ymm12,ymm4 + vpxor ymm0,ymm12,ymm0 + vpslld ymm15,ymm0,7 + vpsrld ymm0,ymm0,25 + vpor ymm0,ymm15,ymm0 + vbroadcasti128 ymm15,XMMWORD[r10] + vpaddd ymm13,ymm13,ymm5 + vpxor ymm1,ymm13,ymm1 + vpslld ymm14,ymm1,7 + vpsrld ymm1,ymm1,25 + vpor ymm1,ymm14,ymm1 + vmovdqa YMMWORD[rsp],ymm12 + vmovdqa YMMWORD[32+rsp],ymm13 + vmovdqa ymm12,YMMWORD[64+rsp] + vmovdqa ymm13,YMMWORD[96+rsp] + vpaddd ymm10,ymm10,ymm2 + vpxor ymm6,ymm10,ymm6 + vpshufb ymm6,ymm6,ymm15 + vpaddd ymm11,ymm11,ymm3 + vpxor ymm7,ymm11,ymm7 + vpshufb ymm7,ymm7,ymm15 + vpaddd ymm12,ymm12,ymm6 + vpxor ymm2,ymm12,ymm2 + vpslld ymm14,ymm2,12 + vpsrld ymm2,ymm2,20 + vpor ymm2,ymm14,ymm2 + vbroadcasti128 ymm14,XMMWORD[r11] + vpaddd ymm13,ymm13,ymm7 + vpxor ymm3,ymm13,ymm3 + vpslld ymm15,ymm3,12 + vpsrld ymm3,ymm3,20 + vpor ymm3,ymm15,ymm3 + vpaddd ymm10,ymm10,ymm2 + vpxor ymm6,ymm10,ymm6 + vpshufb ymm6,ymm6,ymm14 + vpaddd ymm11,ymm11,ymm3 + vpxor ymm7,ymm11,ymm7 + vpshufb ymm7,ymm7,ymm14 + vpaddd ymm12,ymm12,ymm6 + vpxor ymm2,ymm12,ymm2 + vpslld ymm15,ymm2,7 + vpsrld ymm2,ymm2,25 + vpor ymm2,ymm15,ymm2 + vbroadcasti128 ymm15,XMMWORD[r10] + vpaddd ymm13,ymm13,ymm7 + vpxor ymm3,ymm13,ymm3 + vpslld ymm14,ymm3,7 + vpsrld ymm3,ymm3,25 + vpor ymm3,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm1 + vpxor ymm7,ymm8,ymm7 + vpshufb ymm7,ymm7,ymm15 + vpaddd ymm9,ymm9,ymm2 + vpxor ymm4,ymm9,ymm4 + vpshufb ymm4,ymm4,ymm15 + vpaddd ymm12,ymm12,ymm7 + vpxor ymm1,ymm12,ymm1 + vpslld ymm14,ymm1,12 + vpsrld ymm1,ymm1,20 + vpor ymm1,ymm14,ymm1 + vbroadcasti128 ymm14,XMMWORD[r11] + vpaddd ymm13,ymm13,ymm4 + vpxor ymm2,ymm13,ymm2 + vpslld ymm15,ymm2,12 + vpsrld ymm2,ymm2,20 + vpor ymm2,ymm15,ymm2 + vpaddd ymm8,ymm8,ymm1 + vpxor ymm7,ymm8,ymm7 + vpshufb ymm7,ymm7,ymm14 + vpaddd ymm9,ymm9,ymm2 + vpxor ymm4,ymm9,ymm4 + vpshufb ymm4,ymm4,ymm14 + vpaddd ymm12,ymm12,ymm7 + vpxor ymm1,ymm12,ymm1 + vpslld ymm15,ymm1,7 + vpsrld ymm1,ymm1,25 + vpor ymm1,ymm15,ymm1 + vbroadcasti128 ymm15,XMMWORD[r10] + vpaddd ymm13,ymm13,ymm4 + vpxor ymm2,ymm13,ymm2 + vpslld ymm14,ymm2,7 + vpsrld ymm2,ymm2,25 + vpor ymm2,ymm14,ymm2 + vmovdqa YMMWORD[64+rsp],ymm12 + vmovdqa YMMWORD[96+rsp],ymm13 + vmovdqa ymm12,YMMWORD[rsp] + vmovdqa ymm13,YMMWORD[32+rsp] + vpaddd ymm10,ymm10,ymm3 + vpxor ymm5,ymm10,ymm5 + vpshufb ymm5,ymm5,ymm15 + vpaddd ymm11,ymm11,ymm0 + vpxor ymm6,ymm11,ymm6 + vpshufb ymm6,ymm6,ymm15 + vpaddd ymm12,ymm12,ymm5 + vpxor ymm3,ymm12,ymm3 + vpslld ymm14,ymm3,12 + vpsrld ymm3,ymm3,20 + vpor ymm3,ymm14,ymm3 + vbroadcasti128 ymm14,XMMWORD[r11] + vpaddd ymm13,ymm13,ymm6 + vpxor ymm0,ymm13,ymm0 + vpslld ymm15,ymm0,12 + vpsrld ymm0,ymm0,20 + vpor ymm0,ymm15,ymm0 + vpaddd ymm10,ymm10,ymm3 + vpxor ymm5,ymm10,ymm5 + vpshufb ymm5,ymm5,ymm14 + vpaddd ymm11,ymm11,ymm0 + vpxor ymm6,ymm11,ymm6 + vpshufb ymm6,ymm6,ymm14 + vpaddd ymm12,ymm12,ymm5 + vpxor ymm3,ymm12,ymm3 + vpslld ymm15,ymm3,7 + vpsrld ymm3,ymm3,25 + vpor ymm3,ymm15,ymm3 + vbroadcasti128 ymm15,XMMWORD[r10] + vpaddd ymm13,ymm13,ymm6 + vpxor ymm0,ymm13,ymm0 + vpslld ymm14,ymm0,7 + vpsrld ymm0,ymm0,25 + vpor ymm0,ymm14,ymm0 + dec eax + jnz NEAR $L$oop8x + + lea rax,[512+rsp] + vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx] + vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx] + vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx] + vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx] + + vpunpckldq ymm14,ymm8,ymm9 + vpunpckldq ymm15,ymm10,ymm11 + vpunpckhdq ymm8,ymm8,ymm9 + vpunpckhdq ymm10,ymm10,ymm11 + vpunpcklqdq ymm9,ymm14,ymm15 + vpunpckhqdq ymm14,ymm14,ymm15 + vpunpcklqdq ymm11,ymm8,ymm10 + vpunpckhqdq ymm8,ymm8,ymm10 + vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx] + vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx] + vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx] + vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx] + + vpunpckldq ymm10,ymm0,ymm1 + vpunpckldq ymm15,ymm2,ymm3 + vpunpckhdq ymm0,ymm0,ymm1 + vpunpckhdq ymm2,ymm2,ymm3 + vpunpcklqdq ymm1,ymm10,ymm15 + vpunpckhqdq ymm10,ymm10,ymm15 + vpunpcklqdq ymm3,ymm0,ymm2 + vpunpckhqdq ymm0,ymm0,ymm2 + vperm2i128 ymm15,ymm9,ymm1,0x20 + vperm2i128 ymm1,ymm9,ymm1,0x31 + vperm2i128 ymm9,ymm14,ymm10,0x20 + vperm2i128 ymm10,ymm14,ymm10,0x31 + vperm2i128 ymm14,ymm11,ymm3,0x20 + vperm2i128 ymm3,ymm11,ymm3,0x31 + vperm2i128 ymm11,ymm8,ymm0,0x20 + vperm2i128 ymm0,ymm8,ymm0,0x31 + vmovdqa YMMWORD[rsp],ymm15 + vmovdqa YMMWORD[32+rsp],ymm9 + vmovdqa ymm15,YMMWORD[64+rsp] + vmovdqa ymm9,YMMWORD[96+rsp] + + vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax] + vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax] + vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax] + vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax] + + vpunpckldq ymm2,ymm12,ymm13 + vpunpckldq ymm8,ymm15,ymm9 + vpunpckhdq ymm12,ymm12,ymm13 + vpunpckhdq ymm15,ymm15,ymm9 + vpunpcklqdq ymm13,ymm2,ymm8 + vpunpckhqdq ymm2,ymm2,ymm8 + vpunpcklqdq ymm9,ymm12,ymm15 + vpunpckhqdq ymm12,ymm12,ymm15 + vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax] + vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax] + vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax] + vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax] + + vpunpckldq ymm15,ymm4,ymm5 + vpunpckldq ymm8,ymm6,ymm7 + vpunpckhdq ymm4,ymm4,ymm5 + vpunpckhdq ymm6,ymm6,ymm7 + vpunpcklqdq ymm5,ymm15,ymm8 + vpunpckhqdq ymm15,ymm15,ymm8 + vpunpcklqdq ymm7,ymm4,ymm6 + vpunpckhqdq ymm4,ymm4,ymm6 + vperm2i128 ymm8,ymm13,ymm5,0x20 + vperm2i128 ymm5,ymm13,ymm5,0x31 + vperm2i128 ymm13,ymm2,ymm15,0x20 + vperm2i128 ymm15,ymm2,ymm15,0x31 + vperm2i128 ymm2,ymm9,ymm7,0x20 + vperm2i128 ymm7,ymm9,ymm7,0x31 + vperm2i128 ymm9,ymm12,ymm4,0x20 + vperm2i128 ymm4,ymm12,ymm4,0x31 + vmovdqa ymm6,YMMWORD[rsp] + vmovdqa ymm12,YMMWORD[32+rsp] + + cmp rdx,64*8 + jb NEAR $L$tail8x + + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + lea rsi,[128+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + lea rdi,[128+rdi] + + vpxor ymm12,ymm12,YMMWORD[rsi] + vpxor ymm13,ymm13,YMMWORD[32+rsi] + vpxor ymm10,ymm10,YMMWORD[64+rsi] + vpxor ymm15,ymm15,YMMWORD[96+rsi] + lea rsi,[128+rsi] + vmovdqu YMMWORD[rdi],ymm12 + vmovdqu YMMWORD[32+rdi],ymm13 + vmovdqu YMMWORD[64+rdi],ymm10 + vmovdqu YMMWORD[96+rdi],ymm15 + lea rdi,[128+rdi] + + vpxor ymm14,ymm14,YMMWORD[rsi] + vpxor ymm2,ymm2,YMMWORD[32+rsi] + vpxor ymm3,ymm3,YMMWORD[64+rsi] + vpxor ymm7,ymm7,YMMWORD[96+rsi] + lea rsi,[128+rsi] + vmovdqu YMMWORD[rdi],ymm14 + vmovdqu YMMWORD[32+rdi],ymm2 + vmovdqu YMMWORD[64+rdi],ymm3 + vmovdqu YMMWORD[96+rdi],ymm7 + lea rdi,[128+rdi] + + vpxor ymm11,ymm11,YMMWORD[rsi] + vpxor ymm9,ymm9,YMMWORD[32+rsi] + vpxor ymm0,ymm0,YMMWORD[64+rsi] + vpxor ymm4,ymm4,YMMWORD[96+rsi] + lea rsi,[128+rsi] + vmovdqu YMMWORD[rdi],ymm11 + vmovdqu YMMWORD[32+rdi],ymm9 + vmovdqu YMMWORD[64+rdi],ymm0 + vmovdqu YMMWORD[96+rdi],ymm4 + lea rdi,[128+rdi] + + sub rdx,64*8 + jnz NEAR $L$oop_outer8x + + jmp NEAR $L$done8x + +$L$tail8x: + cmp rdx,448 + jae NEAR $L$448_or_more8x + cmp rdx,384 + jae NEAR $L$384_or_more8x + cmp rdx,320 + jae NEAR $L$320_or_more8x + cmp rdx,256 + jae NEAR $L$256_or_more8x + cmp rdx,192 + jae NEAR $L$192_or_more8x + cmp rdx,128 + jae NEAR $L$128_or_more8x + cmp rdx,64 + jae NEAR $L$64_or_more8x + + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm6 + vmovdqa YMMWORD[32+rsp],ymm8 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$64_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + je NEAR $L$done8x + + lea rsi,[64+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm1 + lea rdi,[64+rdi] + sub rdx,64 + vmovdqa YMMWORD[32+rsp],ymm5 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$128_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + je NEAR $L$done8x + + lea rsi,[128+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm12 + lea rdi,[128+rdi] + sub rdx,128 + vmovdqa YMMWORD[32+rsp],ymm13 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$192_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + vpxor ymm12,ymm12,YMMWORD[128+rsi] + vpxor ymm13,ymm13,YMMWORD[160+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + vmovdqu YMMWORD[128+rdi],ymm12 + vmovdqu YMMWORD[160+rdi],ymm13 + je NEAR $L$done8x + + lea rsi,[192+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm10 + lea rdi,[192+rdi] + sub rdx,192 + vmovdqa YMMWORD[32+rsp],ymm15 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$256_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + vpxor ymm12,ymm12,YMMWORD[128+rsi] + vpxor ymm13,ymm13,YMMWORD[160+rsi] + vpxor ymm10,ymm10,YMMWORD[192+rsi] + vpxor ymm15,ymm15,YMMWORD[224+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + vmovdqu YMMWORD[128+rdi],ymm12 + vmovdqu YMMWORD[160+rdi],ymm13 + vmovdqu YMMWORD[192+rdi],ymm10 + vmovdqu YMMWORD[224+rdi],ymm15 + je NEAR $L$done8x + + lea rsi,[256+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm14 + lea rdi,[256+rdi] + sub rdx,256 + vmovdqa YMMWORD[32+rsp],ymm2 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$320_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + vpxor ymm12,ymm12,YMMWORD[128+rsi] + vpxor ymm13,ymm13,YMMWORD[160+rsi] + vpxor ymm10,ymm10,YMMWORD[192+rsi] + vpxor ymm15,ymm15,YMMWORD[224+rsi] + vpxor ymm14,ymm14,YMMWORD[256+rsi] + vpxor ymm2,ymm2,YMMWORD[288+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + vmovdqu YMMWORD[128+rdi],ymm12 + vmovdqu YMMWORD[160+rdi],ymm13 + vmovdqu YMMWORD[192+rdi],ymm10 + vmovdqu YMMWORD[224+rdi],ymm15 + vmovdqu YMMWORD[256+rdi],ymm14 + vmovdqu YMMWORD[288+rdi],ymm2 + je NEAR $L$done8x + + lea rsi,[320+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm3 + lea rdi,[320+rdi] + sub rdx,320 + vmovdqa YMMWORD[32+rsp],ymm7 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$384_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + vpxor ymm12,ymm12,YMMWORD[128+rsi] + vpxor ymm13,ymm13,YMMWORD[160+rsi] + vpxor ymm10,ymm10,YMMWORD[192+rsi] + vpxor ymm15,ymm15,YMMWORD[224+rsi] + vpxor ymm14,ymm14,YMMWORD[256+rsi] + vpxor ymm2,ymm2,YMMWORD[288+rsi] + vpxor ymm3,ymm3,YMMWORD[320+rsi] + vpxor ymm7,ymm7,YMMWORD[352+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + vmovdqu YMMWORD[128+rdi],ymm12 + vmovdqu YMMWORD[160+rdi],ymm13 + vmovdqu YMMWORD[192+rdi],ymm10 + vmovdqu YMMWORD[224+rdi],ymm15 + vmovdqu YMMWORD[256+rdi],ymm14 + vmovdqu YMMWORD[288+rdi],ymm2 + vmovdqu YMMWORD[320+rdi],ymm3 + vmovdqu YMMWORD[352+rdi],ymm7 + je NEAR $L$done8x + + lea rsi,[384+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm11 + lea rdi,[384+rdi] + sub rdx,384 + vmovdqa YMMWORD[32+rsp],ymm9 + jmp NEAR $L$oop_tail8x + +ALIGN 32 +$L$448_or_more8x: + vpxor ymm6,ymm6,YMMWORD[rsi] + vpxor ymm8,ymm8,YMMWORD[32+rsi] + vpxor ymm1,ymm1,YMMWORD[64+rsi] + vpxor ymm5,ymm5,YMMWORD[96+rsi] + vpxor ymm12,ymm12,YMMWORD[128+rsi] + vpxor ymm13,ymm13,YMMWORD[160+rsi] + vpxor ymm10,ymm10,YMMWORD[192+rsi] + vpxor ymm15,ymm15,YMMWORD[224+rsi] + vpxor ymm14,ymm14,YMMWORD[256+rsi] + vpxor ymm2,ymm2,YMMWORD[288+rsi] + vpxor ymm3,ymm3,YMMWORD[320+rsi] + vpxor ymm7,ymm7,YMMWORD[352+rsi] + vpxor ymm11,ymm11,YMMWORD[384+rsi] + vpxor ymm9,ymm9,YMMWORD[416+rsi] + vmovdqu YMMWORD[rdi],ymm6 + vmovdqu YMMWORD[32+rdi],ymm8 + vmovdqu YMMWORD[64+rdi],ymm1 + vmovdqu YMMWORD[96+rdi],ymm5 + vmovdqu YMMWORD[128+rdi],ymm12 + vmovdqu YMMWORD[160+rdi],ymm13 + vmovdqu YMMWORD[192+rdi],ymm10 + vmovdqu YMMWORD[224+rdi],ymm15 + vmovdqu YMMWORD[256+rdi],ymm14 + vmovdqu YMMWORD[288+rdi],ymm2 + vmovdqu YMMWORD[320+rdi],ymm3 + vmovdqu YMMWORD[352+rdi],ymm7 + vmovdqu YMMWORD[384+rdi],ymm11 + vmovdqu YMMWORD[416+rdi],ymm9 + je NEAR $L$done8x + + lea rsi,[448+rsi] + xor r10,r10 + vmovdqa YMMWORD[rsp],ymm0 + lea rdi,[448+rdi] + sub rdx,448 + vmovdqa YMMWORD[32+rsp],ymm4 + +$L$oop_tail8x: + movzx eax,BYTE[r10*1+rsi] + movzx ecx,BYTE[r10*1+rsp] + lea r10,[1+r10] + xor eax,ecx + mov BYTE[((-1))+r10*1+rdi],al + dec rdx + jnz NEAR $L$oop_tail8x + +$L$done8x: + vzeroall + movaps xmm6,XMMWORD[((-168))+r9] + movaps xmm7,XMMWORD[((-152))+r9] + movaps xmm8,XMMWORD[((-136))+r9] + movaps xmm9,XMMWORD[((-120))+r9] + movaps xmm10,XMMWORD[((-104))+r9] + movaps xmm11,XMMWORD[((-88))+r9] + movaps xmm12,XMMWORD[((-72))+r9] + movaps xmm13,XMMWORD[((-56))+r9] + movaps xmm14,XMMWORD[((-40))+r9] + movaps xmm15,XMMWORD[((-24))+r9] + lea rsp,[r9] + +$L$8x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$SEH_end_ChaCha20_ctr32_avx2: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + lea r10,[$L$ctr32_body] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea r10,[$L$no_data] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rax,[((64+24+48))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + ret + + + +ALIGN 16 +ssse3_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[192+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[((-40))+rax] + lea rdi,[512+r8] + mov ecx,4 + DD 0xa548f3fc + + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +full_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[192+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[((-168))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + jmp NEAR $L$common_seh_tail + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_ChaCha20_ctr32_nohw wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_nohw wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_nohw wrt ..imagebase + + DD $L$SEH_begin_ChaCha20_ctr32_ssse3 wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_ssse3 wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_ssse3 wrt ..imagebase + + DD $L$SEH_begin_ChaCha20_ctr32_ssse3_4x wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_ssse3_4x wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_ssse3_4x wrt ..imagebase + DD $L$SEH_begin_ChaCha20_ctr32_avx2 wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_avx2 wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ChaCha20_ctr32_nohw: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + +$L$SEH_info_ChaCha20_ctr32_ssse3: + DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase + +$L$SEH_info_ChaCha20_ctr32_ssse3_4x: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase +$L$SEH_info_ChaCha20_ctr32_avx2: + DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-apple.S b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-apple.S new file mode 100644 index 0000000000..04a1e22304 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-apple.S @@ -0,0 +1,3009 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include +.section __TEXT,__const + +.align 7 +Lchacha20_consts: +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +Linc: +.long 1,2,3,4 +Lrol8: +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +Lclamp: +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC + +.text + + +.align 6 +Lpoly_hash_ad_internal: +.cfi_startproc + cbnz x4, Lpoly_hash_intro + ret + +Lpoly_hash_intro: + cmp x4, #16 + b.lt Lpoly_hash_ad_tail + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #16 + b Lpoly_hash_ad_internal + +Lpoly_hash_ad_tail: + cbz x4, Lpoly_hash_ad_ret + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the AAD + sub x4, x4, #1 + +Lpoly_hash_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, x4] + mov v20.b[0], w11 + subs x4, x4, #1 + b.ge Lpoly_hash_tail_16_compose + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +Lpoly_hash_ad_ret: + ret +.cfi_endproc + + +///////////////////////////////// +// +// void chacha20_poly1305_seal(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, union open_data *seal_data); +// +.globl _chacha20_poly1305_seal +.private_extern _chacha20_poly1305_seal + +.align 6 +_chacha20_poly1305_seal: + AARCH64_SIGN_LINK_REGISTER +.cfi_startproc + stp x29, x30, [sp, #-80]! +.cfi_def_cfa_offset 80 +.cfi_offset w30, -72 +.cfi_offset w29, -80 + mov x29, sp + // We probably could do .cfi_def_cfa w29, 80 at this point, but since + // we don't actually use the frame pointer like that, it's probably not + // worth bothering. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] +.cfi_offset b15, -8 +.cfi_offset b14, -16 +.cfi_offset b13, -24 +.cfi_offset b12, -32 +.cfi_offset b11, -40 +.cfi_offset b10, -48 +.cfi_offset b9, -56 +.cfi_offset b8, -64 + + adrp x11, Lchacha20_consts@PAGE + add x11, x11, Lchacha20_consts@PAGEOFF + + ld1 {v24.16b - v27.16b}, [x11] // Load the CONSTS, INC, ROL8 and CLAMP values + ld1 {v28.16b - v30.16b}, [x5] + + mov x15, #1 // Prepare the Poly1305 state + mov x8, #0 + mov x9, #0 + mov x10, #0 + + ldr x12, [x5, #56] // The total cipher text length includes extra_in_len + add x12, x12, x2 + mov v31.d[0], x4 // Store the input and aad lengths + mov v31.d[1], x12 + + cmp x2, #128 + b.le Lseal_128 // Optimization for smaller buffers + + // Initially we prepare 5 ChaCha20 blocks. Four to encrypt up to 4 blocks (256 bytes) of plaintext, + // and one for the Poly1305 R and S keys. The first four blocks (A0-A3..D0-D3) are computed vertically, + // the fifth block (A4-D4) horizontally. + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + sub x5, x5, #32 + + mov x6, #10 + +.align 5 +Lseal_init_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x6, x6, #1 + b.hi Lseal_init_rounds + + add v15.4s, v15.4s, v25.4s + mov x11, #4 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + and v4.16b, v4.16b, v27.16b + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + mov x16, v4.d[0] // Move the R key to GPRs + mov x17, v4.d[1] + mov v27.16b, v9.16b // Store the S key + + bl Lpoly_hash_ad_internal + + mov x3, x0 + cmp x2, #256 + b.le Lseal_tail + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #256 + + mov x6, #4 // In the first run of the loop we need to hash 256 bytes, therefore we hash one block for the first 4 rounds + mov x7, #6 // and two blocks for the remaining 6, for a total of (1 * 4 + 2 * 6) * 16 = 256 + +Lseal_main_loop: + adrp x11, Lchacha20_consts@PAGE + add x11, x11, Lchacha20_consts@PAGEOFF + + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + sub x5, x5, #32 +.align 5 +Lseal_main_loop_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x6, x6, #1 + b.ge Lseal_main_loop_rounds + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + subs x7, x7, #1 + b.gt Lseal_main_loop_rounds + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + add v15.4s, v15.4s, v25.4s + mov x11, #5 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + add v14.4s, v14.4s, v29.4s + add v19.4s, v19.4s, v30.4s + + cmp x2, #320 + b.le Lseal_tail + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v9.16b + eor v22.16b, v22.16b, v14.16b + eor v23.16b, v23.16b, v19.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #320 + + mov x6, #0 + mov x7, #10 // For the remainder of the loop we always hash and encrypt 320 bytes per iteration + + b Lseal_main_loop + +Lseal_tail: + // This part of the function handles the storage and authentication of the last [0,320) bytes + // We assume A0-A4 ... D0-D4 hold at least inl (320 max) bytes of the stream data. + cmp x2, #64 + b.lt Lseal_tail_64 + + // Store and authenticate 64B blocks per iteration + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v22.d[0] + mov x12, v22.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v23.d[0] + mov x12, v23.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + st1 {v20.16b - v23.16b}, [x0], #64 + sub x2, x2, #64 + + // Shift the state left by 64 bytes for the next iteration of the loop + mov v0.16b, v1.16b + mov v5.16b, v6.16b + mov v10.16b, v11.16b + mov v15.16b, v16.16b + + mov v1.16b, v2.16b + mov v6.16b, v7.16b + mov v11.16b, v12.16b + mov v16.16b, v17.16b + + mov v2.16b, v3.16b + mov v7.16b, v8.16b + mov v12.16b, v13.16b + mov v17.16b, v18.16b + + mov v3.16b, v4.16b + mov v8.16b, v9.16b + mov v13.16b, v14.16b + mov v18.16b, v19.16b + + b Lseal_tail + +Lseal_tail_64: + ldp x3, x4, [x5, #48] // extra_in_len and extra_in_ptr + + // Here we handle the last [0,64) bytes of plaintext + cmp x2, #16 + b.lt Lseal_tail_16 + // Each iteration encrypt and authenticate a 16B block + ld1 {v20.16b}, [x1], #16 + eor v20.16b, v20.16b, v0.16b + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + st1 {v20.16b}, [x0], #16 + + sub x2, x2, #16 + + // Shift the state left by 16 bytes for the next iteration of the loop + mov v0.16b, v5.16b + mov v5.16b, v10.16b + mov v10.16b, v15.16b + + b Lseal_tail_64 + +Lseal_tail_16: + // Here we handle the last [0,16) bytes of ciphertext that require a padded block + cbz x2, Lseal_hash_extra + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the plaintext/extra in + eor v21.16b, v21.16b, v21.16b // Use T1 to generate an AND mask that will only mask the ciphertext bytes + not v22.16b, v20.16b + + mov x6, x2 + add x1, x1, x2 + + cbz x4, Lseal_tail_16_compose // No extra data to pad with, zero padding + + mov x7, #16 // We need to load some extra_in first for padding + sub x7, x7, x2 + cmp x4, x7 + csel x7, x4, x7, lt // Load the minimum of extra_in_len and the amount needed to fill the register + mov x12, x7 + add x3, x3, x7 + sub x4, x4, x7 + +Lseal_tail16_compose_extra_in: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, #-1]! + mov v20.b[0], w11 + subs x7, x7, #1 + b.gt Lseal_tail16_compose_extra_in + + add x3, x3, x12 + +Lseal_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x1, #-1]! + mov v20.b[0], w11 + ext v21.16b, v22.16b, v21.16b, #15 + subs x2, x2, #1 + b.gt Lseal_tail_16_compose + + and v0.16b, v0.16b, v21.16b + eor v20.16b, v20.16b, v0.16b + mov v21.16b, v20.16b + +Lseal_tail_16_store: + umov w11, v20.b[0] + strb w11, [x0], #1 + ext v20.16b, v20.16b, v20.16b, #1 + subs x6, x6, #1 + b.gt Lseal_tail_16_store + + // Hash in the final ct block concatenated with extra_in + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +Lseal_hash_extra: + cbz x4, Lseal_finalize + +Lseal_hash_extra_loop: + cmp x4, #16 + b.lt Lseal_hash_extra_tail + ld1 {v20.16b}, [x3], #16 + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #16 + b Lseal_hash_extra_loop + +Lseal_hash_extra_tail: + cbz x4, Lseal_finalize + eor v20.16b, v20.16b, v20.16b // Use T0 to load the remaining extra ciphertext + add x3, x3, x4 + +Lseal_hash_extra_load: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, #-1]! + mov v20.b[0], w11 + subs x4, x4, #1 + b.gt Lseal_hash_extra_load + + // Hash in the final padded extra_in blcok + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +Lseal_finalize: + mov x11, v31.d[0] + mov x12, v31.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + // Final reduction step + sub x12, xzr, x15 + orr x13, xzr, #3 + subs x11, x8, #-5 + sbcs x12, x9, x12 + sbcs x13, x10, x13 + csel x8, x11, x8, cs + csel x9, x12, x9, cs + csel x10, x13, x10, cs + mov x11, v27.d[0] + mov x12, v27.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + + stp x8, x9, [x5] + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] +.cfi_restore b15 +.cfi_restore b14 +.cfi_restore b13 +.cfi_restore b12 +.cfi_restore b11 +.cfi_restore b10 +.cfi_restore b9 +.cfi_restore b8 + ldp x29, x30, [sp], 80 +.cfi_restore w29 +.cfi_restore w30 +.cfi_def_cfa_offset 0 + AARCH64_VALIDATE_LINK_REGISTER + ret + +Lseal_128: + // On some architectures preparing 5 blocks for small buffers is wasteful + eor v25.16b, v25.16b, v25.16b + mov x11, #1 + mov v25.s[0], w11 + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v17.16b, v30.16b + add v15.4s, v17.4s, v25.4s + add v16.4s, v15.4s, v25.4s + + mov x6, #10 + +Lseal_128_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x6, x6, #1 + b.hi Lseal_128_rounds + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + + // Only the first 32 bytes of the third block (counter = 0) are needed, + // so skip updating v12 and v17. + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + + add v30.4s, v30.4s, v25.4s + add v15.4s, v15.4s, v30.4s + add v30.4s, v30.4s, v25.4s + add v16.4s, v16.4s, v30.4s + + and v2.16b, v2.16b, v27.16b + mov x16, v2.d[0] // Move the R key to GPRs + mov x17, v2.d[1] + mov v27.16b, v7.16b // Store the S key + + bl Lpoly_hash_ad_internal + b Lseal_tail +.cfi_endproc + + +///////////////////////////////// +// +// void chacha20_poly1305_open(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, union open_data *aead_data); +// +.globl _chacha20_poly1305_open +.private_extern _chacha20_poly1305_open + +.align 6 +_chacha20_poly1305_open: + AARCH64_SIGN_LINK_REGISTER +.cfi_startproc + stp x29, x30, [sp, #-80]! +.cfi_def_cfa_offset 80 +.cfi_offset w30, -72 +.cfi_offset w29, -80 + mov x29, sp + // We probably could do .cfi_def_cfa w29, 80 at this point, but since + // we don't actually use the frame pointer like that, it's probably not + // worth bothering. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] +.cfi_offset b15, -8 +.cfi_offset b14, -16 +.cfi_offset b13, -24 +.cfi_offset b12, -32 +.cfi_offset b11, -40 +.cfi_offset b10, -48 +.cfi_offset b9, -56 +.cfi_offset b8, -64 + + adrp x11, Lchacha20_consts@PAGE + add x11, x11, Lchacha20_consts@PAGEOFF + + ld1 {v24.16b - v27.16b}, [x11] // Load the CONSTS, INC, ROL8 and CLAMP values + ld1 {v28.16b - v30.16b}, [x5] + + mov x15, #1 // Prepare the Poly1305 state + mov x8, #0 + mov x9, #0 + mov x10, #0 + + mov v31.d[0], x4 // Store the input and aad lengths + mov v31.d[1], x2 + + cmp x2, #128 + b.le Lopen_128 // Optimization for smaller buffers + + // Initially we prepare a single ChaCha20 block for the Poly1305 R and S keys + mov v0.16b, v24.16b + mov v5.16b, v28.16b + mov v10.16b, v29.16b + mov v15.16b, v30.16b + + mov x6, #10 + +.align 5 +Lopen_init_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + subs x6, x6, #1 + b.hi Lopen_init_rounds + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + + and v0.16b, v0.16b, v27.16b + mov x16, v0.d[0] // Move the R key to GPRs + mov x17, v0.d[1] + mov v27.16b, v5.16b // Store the S key + + bl Lpoly_hash_ad_internal + +Lopen_ad_done: + mov x3, x1 + +// Each iteration of the loop hash 320 bytes, and prepare stream for 320 bytes +Lopen_main_loop: + + cmp x2, #192 + b.lt Lopen_tail + + adrp x11, Lchacha20_consts@PAGE + add x11, x11, Lchacha20_consts@PAGEOFF + + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + sub x5, x5, #32 + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + lsr x4, x2, #4 // How many whole blocks we have to hash, will always be at least 12 + sub x4, x4, #10 + + mov x7, #10 + subs x6, x7, x4 + subs x6, x7, x4 // itr1 can be negative if we have more than 320 bytes to hash + csel x7, x7, x4, le // if itr1 is zero or less, itr2 should be 10 to indicate all 10 rounds are full + + cbz x7, Lopen_main_loop_rounds_short + +.align 5 +Lopen_main_loop_rounds: + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most +Lopen_main_loop_rounds_short: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x7, x7, #1 + b.gt Lopen_main_loop_rounds + subs x6, x6, #1 + b.ge Lopen_main_loop_rounds_short + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + add v15.4s, v15.4s, v25.4s + mov x11, #5 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + add v14.4s, v14.4s, v29.4s + add v19.4s, v19.4s, v30.4s + + // We can always safely store 192 bytes + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #192 + + mov v0.16b, v3.16b + mov v5.16b, v8.16b + mov v10.16b, v13.16b + mov v15.16b, v18.16b + + cmp x2, #64 + b.lt Lopen_tail_64_store + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + + mov v0.16b, v4.16b + mov v5.16b, v9.16b + mov v10.16b, v14.16b + mov v15.16b, v19.16b + + cmp x2, #64 + b.lt Lopen_tail_64_store + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v9.16b + eor v22.16b, v22.16b, v14.16b + eor v23.16b, v23.16b, v19.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + b Lopen_main_loop + +Lopen_tail: + + cbz x2, Lopen_finalize + + lsr x4, x2, #4 // How many whole blocks we have to hash + + cmp x2, #64 + b.le Lopen_tail_64 + cmp x2, #128 + b.le Lopen_tail_128 + +Lopen_tail_192: + // We need three more blocks + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v15.16b, v30.16b + mov v16.16b, v30.16b + mov v17.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + eor v21.16b, v21.16b, v21.16b + ins v23.s[0], v25.s[0] + ins v21.d[0], x15 + + add v22.4s, v23.4s, v21.4s + add v21.4s, v22.4s, v21.4s + + add v15.4s, v15.4s, v21.4s + add v16.4s, v16.4s, v23.4s + add v17.4s, v17.4s, v22.4s + + mov x7, #10 + subs x6, x7, x4 // itr1 can be negative if we have more than 160 bytes to hash + csel x7, x7, x4, le // if itr1 is zero or less, itr2 should be 10 to indicate all 10 rounds are hashing + sub x4, x4, x7 + + cbz x7, Lopen_tail_192_rounds_no_hash + +Lopen_tail_192_rounds: + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most +Lopen_tail_192_rounds_no_hash: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x7, x7, #1 + b.gt Lopen_tail_192_rounds + subs x6, x6, #1 + b.ge Lopen_tail_192_rounds_no_hash + + // We hashed 160 bytes at most, may still have 32 bytes left +Lopen_tail_192_hash: + cbz x4, Lopen_tail_192_hash_done + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #1 + b Lopen_tail_192_hash + +Lopen_tail_192_hash_done: + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + add v12.4s, v12.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v16.4s, v16.4s, v30.4s + add v17.4s, v17.4s, v30.4s + + add v15.4s, v15.4s, v21.4s + add v16.4s, v16.4s, v23.4s + add v17.4s, v17.4s, v22.4s + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #128 + b Lopen_tail_64_store + +Lopen_tail_128: + // We need two more blocks + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v15.16b, v30.16b + mov v16.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + eor v22.16b, v22.16b, v22.16b + ins v23.s[0], v25.s[0] + ins v22.d[0], x15 + add v22.4s, v22.4s, v23.4s + + add v15.4s, v15.4s, v22.4s + add v16.4s, v16.4s, v23.4s + + mov x6, #10 + sub x6, x6, x4 + +Lopen_tail_128_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v1.4s, v1.4s, v6.4s + eor v16.16b, v16.16b, v1.16b + rev32 v16.8h, v16.8h + + add v11.4s, v11.4s, v16.4s + eor v6.16b, v6.16b, v11.16b + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + add v1.4s, v1.4s, v20.4s + eor v16.16b, v16.16b, v1.16b + tbl v16.16b, {v16.16b}, v26.16b + + add v11.4s, v11.4s, v16.4s + eor v20.16b, v20.16b, v11.16b + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + ext v6.16b, v6.16b, v6.16b, #4 + ext v11.16b, v11.16b, v11.16b, #8 + ext v16.16b, v16.16b, v16.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + add v1.4s, v1.4s, v6.4s + eor v16.16b, v16.16b, v1.16b + rev32 v16.8h, v16.8h + + add v11.4s, v11.4s, v16.4s + eor v6.16b, v6.16b, v11.16b + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + add v1.4s, v1.4s, v20.4s + eor v16.16b, v16.16b, v1.16b + tbl v16.16b, {v16.16b}, v26.16b + + add v11.4s, v11.4s, v16.4s + eor v20.16b, v20.16b, v11.16b + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + ext v6.16b, v6.16b, v6.16b, #12 + ext v11.16b, v11.16b, v11.16b, #8 + ext v16.16b, v16.16b, v16.16b, #4 + subs x6, x6, #1 + b.gt Lopen_tail_128_rounds + cbz x4, Lopen_tail_128_rounds_done + subs x4, x4, #1 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + b Lopen_tail_128_rounds + +Lopen_tail_128_rounds_done: + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v16.4s, v16.4s, v30.4s + add v15.4s, v15.4s, v22.4s + add v16.4s, v16.4s, v23.4s + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + sub x2, x2, #64 + + b Lopen_tail_64_store + +Lopen_tail_64: + // We just need a single block + mov v0.16b, v24.16b + mov v5.16b, v28.16b + mov v10.16b, v29.16b + mov v15.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + ins v23.s[0], v25.s[0] + add v15.4s, v15.4s, v23.4s + + mov x6, #10 + sub x6, x6, x4 + +Lopen_tail_64_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + subs x6, x6, #1 + b.gt Lopen_tail_64_rounds + cbz x4, Lopen_tail_64_rounds_done + subs x4, x4, #1 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + b Lopen_tail_64_rounds + +Lopen_tail_64_rounds_done: + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v15.4s, v15.4s, v23.4s + +Lopen_tail_64_store: + cmp x2, #16 + b.lt Lopen_tail_16 + + ld1 {v20.16b}, [x1], #16 + eor v20.16b, v20.16b, v0.16b + st1 {v20.16b}, [x0], #16 + mov v0.16b, v5.16b + mov v5.16b, v10.16b + mov v10.16b, v15.16b + sub x2, x2, #16 + b Lopen_tail_64_store + +Lopen_tail_16: + // Here we handle the last [0,16) bytes that require a padded block + cbz x2, Lopen_finalize + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the ciphertext + eor v21.16b, v21.16b, v21.16b // Use T1 to generate an AND mask + not v22.16b, v20.16b + + add x7, x1, x2 + mov x6, x2 + +Lopen_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x7, #-1]! + mov v20.b[0], w11 + ext v21.16b, v22.16b, v21.16b, #15 + subs x2, x2, #1 + b.gt Lopen_tail_16_compose + + and v20.16b, v20.16b, v21.16b + // Hash in the final padded block + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + eor v20.16b, v20.16b, v0.16b + +Lopen_tail_16_store: + umov w11, v20.b[0] + strb w11, [x0], #1 + ext v20.16b, v20.16b, v20.16b, #1 + subs x6, x6, #1 + b.gt Lopen_tail_16_store + +Lopen_finalize: + mov x11, v31.d[0] + mov x12, v31.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + // Final reduction step + sub x12, xzr, x15 + orr x13, xzr, #3 + subs x11, x8, #-5 + sbcs x12, x9, x12 + sbcs x13, x10, x13 + csel x8, x11, x8, cs + csel x9, x12, x9, cs + csel x10, x13, x10, cs + mov x11, v27.d[0] + mov x12, v27.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + + stp x8, x9, [x5] + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] +.cfi_restore b15 +.cfi_restore b14 +.cfi_restore b13 +.cfi_restore b12 +.cfi_restore b11 +.cfi_restore b10 +.cfi_restore b9 +.cfi_restore b8 + ldp x29, x30, [sp], 80 +.cfi_restore w29 +.cfi_restore w30 +.cfi_def_cfa_offset 0 + AARCH64_VALIDATE_LINK_REGISTER + ret + +Lopen_128: + // On some architectures preparing 5 blocks for small buffers is wasteful + eor v25.16b, v25.16b, v25.16b + mov x11, #1 + mov v25.s[0], w11 + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v17.16b, v30.16b + add v15.4s, v17.4s, v25.4s + add v16.4s, v15.4s, v25.4s + + mov x6, #10 + +Lopen_128_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x6, x6, #1 + b.hi Lopen_128_rounds + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + + add v30.4s, v30.4s, v25.4s + add v15.4s, v15.4s, v30.4s + add v30.4s, v30.4s, v25.4s + add v16.4s, v16.4s, v30.4s + + and v2.16b, v2.16b, v27.16b + mov x16, v2.d[0] // Move the R key to GPRs + mov x17, v2.d[1] + mov v27.16b, v7.16b // Store the S key + + bl Lpoly_hash_ad_internal + +Lopen_128_store: + cmp x2, #64 + b.lt Lopen_128_store_64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v22.d[0] + mov x12, v22.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v23.d[0] + mov x12, v23.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + + mov v0.16b, v1.16b + mov v5.16b, v6.16b + mov v10.16b, v11.16b + mov v15.16b, v16.16b + +Lopen_128_store_64: + + lsr x4, x2, #4 + mov x3, x1 + +Lopen_128_hash_64: + cbz x4, Lopen_tail_64_store + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #1 + b Lopen_128_hash_64 +.cfi_endproc + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-linux.S b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-linux.S new file mode 100644 index 0000000000..7d2db8db26 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-linux.S @@ -0,0 +1,3009 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include +.section .rodata + +.align 7 +.Lchacha20_consts: +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +.Linc: +.long 1,2,3,4 +.Lrol8: +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +.Lclamp: +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC + +.text + +.type .Lpoly_hash_ad_internal,%function +.align 6 +.Lpoly_hash_ad_internal: +.cfi_startproc + cbnz x4, .Lpoly_hash_intro + ret + +.Lpoly_hash_intro: + cmp x4, #16 + b.lt .Lpoly_hash_ad_tail + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #16 + b .Lpoly_hash_ad_internal + +.Lpoly_hash_ad_tail: + cbz x4, .Lpoly_hash_ad_ret + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the AAD + sub x4, x4, #1 + +.Lpoly_hash_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, x4] + mov v20.b[0], w11 + subs x4, x4, #1 + b.ge .Lpoly_hash_tail_16_compose + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +.Lpoly_hash_ad_ret: + ret +.cfi_endproc +.size .Lpoly_hash_ad_internal, .-.Lpoly_hash_ad_internal + +///////////////////////////////// +// +// void chacha20_poly1305_seal(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, union open_data *seal_data); +// +.globl chacha20_poly1305_seal +.hidden chacha20_poly1305_seal +.type chacha20_poly1305_seal,%function +.align 6 +chacha20_poly1305_seal: + AARCH64_SIGN_LINK_REGISTER +.cfi_startproc + stp x29, x30, [sp, #-80]! +.cfi_def_cfa_offset 80 +.cfi_offset w30, -72 +.cfi_offset w29, -80 + mov x29, sp + // We probably could do .cfi_def_cfa w29, 80 at this point, but since + // we don't actually use the frame pointer like that, it's probably not + // worth bothering. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] +.cfi_offset b15, -8 +.cfi_offset b14, -16 +.cfi_offset b13, -24 +.cfi_offset b12, -32 +.cfi_offset b11, -40 +.cfi_offset b10, -48 +.cfi_offset b9, -56 +.cfi_offset b8, -64 + + adrp x11, .Lchacha20_consts + add x11, x11, :lo12:.Lchacha20_consts + + ld1 {v24.16b - v27.16b}, [x11] // .Load the CONSTS, INC, ROL8 and CLAMP values + ld1 {v28.16b - v30.16b}, [x5] + + mov x15, #1 // Prepare the Poly1305 state + mov x8, #0 + mov x9, #0 + mov x10, #0 + + ldr x12, [x5, #56] // The total cipher text length includes extra_in_len + add x12, x12, x2 + mov v31.d[0], x4 // Store the input and aad lengths + mov v31.d[1], x12 + + cmp x2, #128 + b.le .Lseal_128 // Optimization for smaller buffers + + // Initially we prepare 5 ChaCha20 blocks. Four to encrypt up to 4 blocks (256 bytes) of plaintext, + // and one for the Poly1305 R and S keys. The first four blocks (A0-A3..D0-D3) are computed vertically, + // the fifth block (A4-D4) horizontally. + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + sub x5, x5, #32 + + mov x6, #10 + +.align 5 +.Lseal_init_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x6, x6, #1 + b.hi .Lseal_init_rounds + + add v15.4s, v15.4s, v25.4s + mov x11, #4 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + and v4.16b, v4.16b, v27.16b + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + mov x16, v4.d[0] // Move the R key to GPRs + mov x17, v4.d[1] + mov v27.16b, v9.16b // Store the S key + + bl .Lpoly_hash_ad_internal + + mov x3, x0 + cmp x2, #256 + b.le .Lseal_tail + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #256 + + mov x6, #4 // In the first run of the loop we need to hash 256 bytes, therefore we hash one block for the first 4 rounds + mov x7, #6 // and two blocks for the remaining 6, for a total of (1 * 4 + 2 * 6) * 16 = 256 + +.Lseal_main_loop: + adrp x11, .Lchacha20_consts + add x11, x11, :lo12:.Lchacha20_consts + + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + sub x5, x5, #32 +.align 5 +.Lseal_main_loop_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x6, x6, #1 + b.ge .Lseal_main_loop_rounds + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + subs x7, x7, #1 + b.gt .Lseal_main_loop_rounds + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + add v15.4s, v15.4s, v25.4s + mov x11, #5 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + add v14.4s, v14.4s, v29.4s + add v19.4s, v19.4s, v30.4s + + cmp x2, #320 + b.le .Lseal_tail + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v9.16b + eor v22.16b, v22.16b, v14.16b + eor v23.16b, v23.16b, v19.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #320 + + mov x6, #0 + mov x7, #10 // For the remainder of the loop we always hash and encrypt 320 bytes per iteration + + b .Lseal_main_loop + +.Lseal_tail: + // This part of the function handles the storage and authentication of the last [0,320) bytes + // We assume A0-A4 ... D0-D4 hold at least inl (320 max) bytes of the stream data. + cmp x2, #64 + b.lt .Lseal_tail_64 + + // Store and authenticate 64B blocks per iteration + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v22.d[0] + mov x12, v22.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v23.d[0] + mov x12, v23.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + st1 {v20.16b - v23.16b}, [x0], #64 + sub x2, x2, #64 + + // Shift the state left by 64 bytes for the next iteration of the loop + mov v0.16b, v1.16b + mov v5.16b, v6.16b + mov v10.16b, v11.16b + mov v15.16b, v16.16b + + mov v1.16b, v2.16b + mov v6.16b, v7.16b + mov v11.16b, v12.16b + mov v16.16b, v17.16b + + mov v2.16b, v3.16b + mov v7.16b, v8.16b + mov v12.16b, v13.16b + mov v17.16b, v18.16b + + mov v3.16b, v4.16b + mov v8.16b, v9.16b + mov v13.16b, v14.16b + mov v18.16b, v19.16b + + b .Lseal_tail + +.Lseal_tail_64: + ldp x3, x4, [x5, #48] // extra_in_len and extra_in_ptr + + // Here we handle the last [0,64) bytes of plaintext + cmp x2, #16 + b.lt .Lseal_tail_16 + // Each iteration encrypt and authenticate a 16B block + ld1 {v20.16b}, [x1], #16 + eor v20.16b, v20.16b, v0.16b + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + st1 {v20.16b}, [x0], #16 + + sub x2, x2, #16 + + // Shift the state left by 16 bytes for the next iteration of the loop + mov v0.16b, v5.16b + mov v5.16b, v10.16b + mov v10.16b, v15.16b + + b .Lseal_tail_64 + +.Lseal_tail_16: + // Here we handle the last [0,16) bytes of ciphertext that require a padded block + cbz x2, .Lseal_hash_extra + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the plaintext/extra in + eor v21.16b, v21.16b, v21.16b // Use T1 to generate an AND mask that will only mask the ciphertext bytes + not v22.16b, v20.16b + + mov x6, x2 + add x1, x1, x2 + + cbz x4, .Lseal_tail_16_compose // No extra data to pad with, zero padding + + mov x7, #16 // We need to load some extra_in first for padding + sub x7, x7, x2 + cmp x4, x7 + csel x7, x4, x7, lt // .Load the minimum of extra_in_len and the amount needed to fill the register + mov x12, x7 + add x3, x3, x7 + sub x4, x4, x7 + +.Lseal_tail16_compose_extra_in: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, #-1]! + mov v20.b[0], w11 + subs x7, x7, #1 + b.gt .Lseal_tail16_compose_extra_in + + add x3, x3, x12 + +.Lseal_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x1, #-1]! + mov v20.b[0], w11 + ext v21.16b, v22.16b, v21.16b, #15 + subs x2, x2, #1 + b.gt .Lseal_tail_16_compose + + and v0.16b, v0.16b, v21.16b + eor v20.16b, v20.16b, v0.16b + mov v21.16b, v20.16b + +.Lseal_tail_16_store: + umov w11, v20.b[0] + strb w11, [x0], #1 + ext v20.16b, v20.16b, v20.16b, #1 + subs x6, x6, #1 + b.gt .Lseal_tail_16_store + + // Hash in the final ct block concatenated with extra_in + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +.Lseal_hash_extra: + cbz x4, .Lseal_finalize + +.Lseal_hash_extra_loop: + cmp x4, #16 + b.lt .Lseal_hash_extra_tail + ld1 {v20.16b}, [x3], #16 + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #16 + b .Lseal_hash_extra_loop + +.Lseal_hash_extra_tail: + cbz x4, .Lseal_finalize + eor v20.16b, v20.16b, v20.16b // Use T0 to load the remaining extra ciphertext + add x3, x3, x4 + +.Lseal_hash_extra_load: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, #-1]! + mov v20.b[0], w11 + subs x4, x4, #1 + b.gt .Lseal_hash_extra_load + + // Hash in the final padded extra_in blcok + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +.Lseal_finalize: + mov x11, v31.d[0] + mov x12, v31.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + // Final reduction step + sub x12, xzr, x15 + orr x13, xzr, #3 + subs x11, x8, #-5 + sbcs x12, x9, x12 + sbcs x13, x10, x13 + csel x8, x11, x8, cs + csel x9, x12, x9, cs + csel x10, x13, x10, cs + mov x11, v27.d[0] + mov x12, v27.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + + stp x8, x9, [x5] + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] +.cfi_restore b15 +.cfi_restore b14 +.cfi_restore b13 +.cfi_restore b12 +.cfi_restore b11 +.cfi_restore b10 +.cfi_restore b9 +.cfi_restore b8 + ldp x29, x30, [sp], 80 +.cfi_restore w29 +.cfi_restore w30 +.cfi_def_cfa_offset 0 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.Lseal_128: + // On some architectures preparing 5 blocks for small buffers is wasteful + eor v25.16b, v25.16b, v25.16b + mov x11, #1 + mov v25.s[0], w11 + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v17.16b, v30.16b + add v15.4s, v17.4s, v25.4s + add v16.4s, v15.4s, v25.4s + + mov x6, #10 + +.Lseal_128_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x6, x6, #1 + b.hi .Lseal_128_rounds + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + + // Only the first 32 bytes of the third block (counter = 0) are needed, + // so skip updating v12 and v17. + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + + add v30.4s, v30.4s, v25.4s + add v15.4s, v15.4s, v30.4s + add v30.4s, v30.4s, v25.4s + add v16.4s, v16.4s, v30.4s + + and v2.16b, v2.16b, v27.16b + mov x16, v2.d[0] // Move the R key to GPRs + mov x17, v2.d[1] + mov v27.16b, v7.16b // Store the S key + + bl .Lpoly_hash_ad_internal + b .Lseal_tail +.cfi_endproc +.size chacha20_poly1305_seal,.-chacha20_poly1305_seal + +///////////////////////////////// +// +// void chacha20_poly1305_open(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, union open_data *aead_data); +// +.globl chacha20_poly1305_open +.hidden chacha20_poly1305_open +.type chacha20_poly1305_open,%function +.align 6 +chacha20_poly1305_open: + AARCH64_SIGN_LINK_REGISTER +.cfi_startproc + stp x29, x30, [sp, #-80]! +.cfi_def_cfa_offset 80 +.cfi_offset w30, -72 +.cfi_offset w29, -80 + mov x29, sp + // We probably could do .cfi_def_cfa w29, 80 at this point, but since + // we don't actually use the frame pointer like that, it's probably not + // worth bothering. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] +.cfi_offset b15, -8 +.cfi_offset b14, -16 +.cfi_offset b13, -24 +.cfi_offset b12, -32 +.cfi_offset b11, -40 +.cfi_offset b10, -48 +.cfi_offset b9, -56 +.cfi_offset b8, -64 + + adrp x11, .Lchacha20_consts + add x11, x11, :lo12:.Lchacha20_consts + + ld1 {v24.16b - v27.16b}, [x11] // .Load the CONSTS, INC, ROL8 and CLAMP values + ld1 {v28.16b - v30.16b}, [x5] + + mov x15, #1 // Prepare the Poly1305 state + mov x8, #0 + mov x9, #0 + mov x10, #0 + + mov v31.d[0], x4 // Store the input and aad lengths + mov v31.d[1], x2 + + cmp x2, #128 + b.le .Lopen_128 // Optimization for smaller buffers + + // Initially we prepare a single ChaCha20 block for the Poly1305 R and S keys + mov v0.16b, v24.16b + mov v5.16b, v28.16b + mov v10.16b, v29.16b + mov v15.16b, v30.16b + + mov x6, #10 + +.align 5 +.Lopen_init_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + subs x6, x6, #1 + b.hi .Lopen_init_rounds + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + + and v0.16b, v0.16b, v27.16b + mov x16, v0.d[0] // Move the R key to GPRs + mov x17, v0.d[1] + mov v27.16b, v5.16b // Store the S key + + bl .Lpoly_hash_ad_internal + +.Lopen_ad_done: + mov x3, x1 + +// Each iteration of the loop hash 320 bytes, and prepare stream for 320 bytes +.Lopen_main_loop: + + cmp x2, #192 + b.lt .Lopen_tail + + adrp x11, .Lchacha20_consts + add x11, x11, :lo12:.Lchacha20_consts + + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + sub x5, x5, #32 + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + lsr x4, x2, #4 // How many whole blocks we have to hash, will always be at least 12 + sub x4, x4, #10 + + mov x7, #10 + subs x6, x7, x4 + subs x6, x7, x4 // itr1 can be negative if we have more than 320 bytes to hash + csel x7, x7, x4, le // if itr1 is zero or less, itr2 should be 10 to indicate all 10 rounds are full + + cbz x7, .Lopen_main_loop_rounds_short + +.align 5 +.Lopen_main_loop_rounds: + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most +.Lopen_main_loop_rounds_short: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x7, x7, #1 + b.gt .Lopen_main_loop_rounds + subs x6, x6, #1 + b.ge .Lopen_main_loop_rounds_short + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + add v15.4s, v15.4s, v25.4s + mov x11, #5 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + add v14.4s, v14.4s, v29.4s + add v19.4s, v19.4s, v30.4s + + // We can always safely store 192 bytes + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #192 + + mov v0.16b, v3.16b + mov v5.16b, v8.16b + mov v10.16b, v13.16b + mov v15.16b, v18.16b + + cmp x2, #64 + b.lt .Lopen_tail_64_store + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + + mov v0.16b, v4.16b + mov v5.16b, v9.16b + mov v10.16b, v14.16b + mov v15.16b, v19.16b + + cmp x2, #64 + b.lt .Lopen_tail_64_store + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v9.16b + eor v22.16b, v22.16b, v14.16b + eor v23.16b, v23.16b, v19.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + b .Lopen_main_loop + +.Lopen_tail: + + cbz x2, .Lopen_finalize + + lsr x4, x2, #4 // How many whole blocks we have to hash + + cmp x2, #64 + b.le .Lopen_tail_64 + cmp x2, #128 + b.le .Lopen_tail_128 + +.Lopen_tail_192: + // We need three more blocks + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v15.16b, v30.16b + mov v16.16b, v30.16b + mov v17.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + eor v21.16b, v21.16b, v21.16b + ins v23.s[0], v25.s[0] + ins v21.d[0], x15 + + add v22.4s, v23.4s, v21.4s + add v21.4s, v22.4s, v21.4s + + add v15.4s, v15.4s, v21.4s + add v16.4s, v16.4s, v23.4s + add v17.4s, v17.4s, v22.4s + + mov x7, #10 + subs x6, x7, x4 // itr1 can be negative if we have more than 160 bytes to hash + csel x7, x7, x4, le // if itr1 is zero or less, itr2 should be 10 to indicate all 10 rounds are hashing + sub x4, x4, x7 + + cbz x7, .Lopen_tail_192_rounds_no_hash + +.Lopen_tail_192_rounds: + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most +.Lopen_tail_192_rounds_no_hash: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x7, x7, #1 + b.gt .Lopen_tail_192_rounds + subs x6, x6, #1 + b.ge .Lopen_tail_192_rounds_no_hash + + // We hashed 160 bytes at most, may still have 32 bytes left +.Lopen_tail_192_hash: + cbz x4, .Lopen_tail_192_hash_done + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #1 + b .Lopen_tail_192_hash + +.Lopen_tail_192_hash_done: + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + add v12.4s, v12.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v16.4s, v16.4s, v30.4s + add v17.4s, v17.4s, v30.4s + + add v15.4s, v15.4s, v21.4s + add v16.4s, v16.4s, v23.4s + add v17.4s, v17.4s, v22.4s + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #128 + b .Lopen_tail_64_store + +.Lopen_tail_128: + // We need two more blocks + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v15.16b, v30.16b + mov v16.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + eor v22.16b, v22.16b, v22.16b + ins v23.s[0], v25.s[0] + ins v22.d[0], x15 + add v22.4s, v22.4s, v23.4s + + add v15.4s, v15.4s, v22.4s + add v16.4s, v16.4s, v23.4s + + mov x6, #10 + sub x6, x6, x4 + +.Lopen_tail_128_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v1.4s, v1.4s, v6.4s + eor v16.16b, v16.16b, v1.16b + rev32 v16.8h, v16.8h + + add v11.4s, v11.4s, v16.4s + eor v6.16b, v6.16b, v11.16b + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + add v1.4s, v1.4s, v20.4s + eor v16.16b, v16.16b, v1.16b + tbl v16.16b, {v16.16b}, v26.16b + + add v11.4s, v11.4s, v16.4s + eor v20.16b, v20.16b, v11.16b + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + ext v6.16b, v6.16b, v6.16b, #4 + ext v11.16b, v11.16b, v11.16b, #8 + ext v16.16b, v16.16b, v16.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + add v1.4s, v1.4s, v6.4s + eor v16.16b, v16.16b, v1.16b + rev32 v16.8h, v16.8h + + add v11.4s, v11.4s, v16.4s + eor v6.16b, v6.16b, v11.16b + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + add v1.4s, v1.4s, v20.4s + eor v16.16b, v16.16b, v1.16b + tbl v16.16b, {v16.16b}, v26.16b + + add v11.4s, v11.4s, v16.4s + eor v20.16b, v20.16b, v11.16b + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + ext v6.16b, v6.16b, v6.16b, #12 + ext v11.16b, v11.16b, v11.16b, #8 + ext v16.16b, v16.16b, v16.16b, #4 + subs x6, x6, #1 + b.gt .Lopen_tail_128_rounds + cbz x4, .Lopen_tail_128_rounds_done + subs x4, x4, #1 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + b .Lopen_tail_128_rounds + +.Lopen_tail_128_rounds_done: + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v16.4s, v16.4s, v30.4s + add v15.4s, v15.4s, v22.4s + add v16.4s, v16.4s, v23.4s + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + sub x2, x2, #64 + + b .Lopen_tail_64_store + +.Lopen_tail_64: + // We just need a single block + mov v0.16b, v24.16b + mov v5.16b, v28.16b + mov v10.16b, v29.16b + mov v15.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + ins v23.s[0], v25.s[0] + add v15.4s, v15.4s, v23.4s + + mov x6, #10 + sub x6, x6, x4 + +.Lopen_tail_64_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + subs x6, x6, #1 + b.gt .Lopen_tail_64_rounds + cbz x4, .Lopen_tail_64_rounds_done + subs x4, x4, #1 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + b .Lopen_tail_64_rounds + +.Lopen_tail_64_rounds_done: + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v15.4s, v15.4s, v23.4s + +.Lopen_tail_64_store: + cmp x2, #16 + b.lt .Lopen_tail_16 + + ld1 {v20.16b}, [x1], #16 + eor v20.16b, v20.16b, v0.16b + st1 {v20.16b}, [x0], #16 + mov v0.16b, v5.16b + mov v5.16b, v10.16b + mov v10.16b, v15.16b + sub x2, x2, #16 + b .Lopen_tail_64_store + +.Lopen_tail_16: + // Here we handle the last [0,16) bytes that require a padded block + cbz x2, .Lopen_finalize + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the ciphertext + eor v21.16b, v21.16b, v21.16b // Use T1 to generate an AND mask + not v22.16b, v20.16b + + add x7, x1, x2 + mov x6, x2 + +.Lopen_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x7, #-1]! + mov v20.b[0], w11 + ext v21.16b, v22.16b, v21.16b, #15 + subs x2, x2, #1 + b.gt .Lopen_tail_16_compose + + and v20.16b, v20.16b, v21.16b + // Hash in the final padded block + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + eor v20.16b, v20.16b, v0.16b + +.Lopen_tail_16_store: + umov w11, v20.b[0] + strb w11, [x0], #1 + ext v20.16b, v20.16b, v20.16b, #1 + subs x6, x6, #1 + b.gt .Lopen_tail_16_store + +.Lopen_finalize: + mov x11, v31.d[0] + mov x12, v31.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + // Final reduction step + sub x12, xzr, x15 + orr x13, xzr, #3 + subs x11, x8, #-5 + sbcs x12, x9, x12 + sbcs x13, x10, x13 + csel x8, x11, x8, cs + csel x9, x12, x9, cs + csel x10, x13, x10, cs + mov x11, v27.d[0] + mov x12, v27.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + + stp x8, x9, [x5] + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] +.cfi_restore b15 +.cfi_restore b14 +.cfi_restore b13 +.cfi_restore b12 +.cfi_restore b11 +.cfi_restore b10 +.cfi_restore b9 +.cfi_restore b8 + ldp x29, x30, [sp], 80 +.cfi_restore w29 +.cfi_restore w30 +.cfi_def_cfa_offset 0 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.Lopen_128: + // On some architectures preparing 5 blocks for small buffers is wasteful + eor v25.16b, v25.16b, v25.16b + mov x11, #1 + mov v25.s[0], w11 + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v17.16b, v30.16b + add v15.4s, v17.4s, v25.4s + add v16.4s, v15.4s, v25.4s + + mov x6, #10 + +.Lopen_128_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x6, x6, #1 + b.hi .Lopen_128_rounds + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + + add v30.4s, v30.4s, v25.4s + add v15.4s, v15.4s, v30.4s + add v30.4s, v30.4s, v25.4s + add v16.4s, v16.4s, v30.4s + + and v2.16b, v2.16b, v27.16b + mov x16, v2.d[0] // Move the R key to GPRs + mov x17, v2.d[1] + mov v27.16b, v7.16b // Store the S key + + bl .Lpoly_hash_ad_internal + +.Lopen_128_store: + cmp x2, #64 + b.lt .Lopen_128_store_64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v22.d[0] + mov x12, v22.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v23.d[0] + mov x12, v23.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + + mov v0.16b, v1.16b + mov v5.16b, v6.16b + mov v10.16b, v11.16b + mov v15.16b, v16.16b + +.Lopen_128_store_64: + + lsr x4, x2, #4 + mov x3, x1 + +.Lopen_128_hash_64: + cbz x4, .Lopen_tail_64_store + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #1 + b .Lopen_128_hash_64 +.cfi_endproc +.size chacha20_poly1305_open,.-chacha20_poly1305_open +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-win.S b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-win.S new file mode 100644 index 0000000000..3314f2c5d4 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_armv8-win.S @@ -0,0 +1,3015 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include +.section .rodata + +.align 7 +Lchacha20_consts: +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +Linc: +.long 1,2,3,4 +Lrol8: +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +Lclamp: +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC + +.text + +.def Lpoly_hash_ad_internal + .type 32 +.endef +.align 6 +Lpoly_hash_ad_internal: +.cfi_startproc + cbnz x4, Lpoly_hash_intro + ret + +Lpoly_hash_intro: + cmp x4, #16 + b.lt Lpoly_hash_ad_tail + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #16 + b Lpoly_hash_ad_internal + +Lpoly_hash_ad_tail: + cbz x4, Lpoly_hash_ad_ret + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the AAD + sub x4, x4, #1 + +Lpoly_hash_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, x4] + mov v20.b[0], w11 + subs x4, x4, #1 + b.ge Lpoly_hash_tail_16_compose + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +Lpoly_hash_ad_ret: + ret +.cfi_endproc + + +///////////////////////////////// +// +// void chacha20_poly1305_seal(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, union open_data *seal_data); +// +.globl chacha20_poly1305_seal + +.def chacha20_poly1305_seal + .type 32 +.endef +.align 6 +chacha20_poly1305_seal: + AARCH64_SIGN_LINK_REGISTER +.cfi_startproc + stp x29, x30, [sp, #-80]! +.cfi_def_cfa_offset 80 +.cfi_offset w30, -72 +.cfi_offset w29, -80 + mov x29, sp + // We probably could do .cfi_def_cfa w29, 80 at this point, but since + // we don't actually use the frame pointer like that, it's probably not + // worth bothering. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] +.cfi_offset b15, -8 +.cfi_offset b14, -16 +.cfi_offset b13, -24 +.cfi_offset b12, -32 +.cfi_offset b11, -40 +.cfi_offset b10, -48 +.cfi_offset b9, -56 +.cfi_offset b8, -64 + + adrp x11, Lchacha20_consts + add x11, x11, :lo12:Lchacha20_consts + + ld1 {v24.16b - v27.16b}, [x11] // Load the CONSTS, INC, ROL8 and CLAMP values + ld1 {v28.16b - v30.16b}, [x5] + + mov x15, #1 // Prepare the Poly1305 state + mov x8, #0 + mov x9, #0 + mov x10, #0 + + ldr x12, [x5, #56] // The total cipher text length includes extra_in_len + add x12, x12, x2 + mov v31.d[0], x4 // Store the input and aad lengths + mov v31.d[1], x12 + + cmp x2, #128 + b.le Lseal_128 // Optimization for smaller buffers + + // Initially we prepare 5 ChaCha20 blocks. Four to encrypt up to 4 blocks (256 bytes) of plaintext, + // and one for the Poly1305 R and S keys. The first four blocks (A0-A3..D0-D3) are computed vertically, + // the fifth block (A4-D4) horizontally. + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + sub x5, x5, #32 + + mov x6, #10 + +.align 5 +Lseal_init_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x6, x6, #1 + b.hi Lseal_init_rounds + + add v15.4s, v15.4s, v25.4s + mov x11, #4 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + and v4.16b, v4.16b, v27.16b + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + mov x16, v4.d[0] // Move the R key to GPRs + mov x17, v4.d[1] + mov v27.16b, v9.16b // Store the S key + + bl Lpoly_hash_ad_internal + + mov x3, x0 + cmp x2, #256 + b.le Lseal_tail + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #256 + + mov x6, #4 // In the first run of the loop we need to hash 256 bytes, therefore we hash one block for the first 4 rounds + mov x7, #6 // and two blocks for the remaining 6, for a total of (1 * 4 + 2 * 6) * 16 = 256 + +Lseal_main_loop: + adrp x11, Lchacha20_consts + add x11, x11, :lo12:Lchacha20_consts + + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + sub x5, x5, #32 +.align 5 +Lseal_main_loop_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x6, x6, #1 + b.ge Lseal_main_loop_rounds + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + subs x7, x7, #1 + b.gt Lseal_main_loop_rounds + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + add v15.4s, v15.4s, v25.4s + mov x11, #5 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + add v14.4s, v14.4s, v29.4s + add v19.4s, v19.4s, v30.4s + + cmp x2, #320 + b.le Lseal_tail + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v9.16b + eor v22.16b, v22.16b, v14.16b + eor v23.16b, v23.16b, v19.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #320 + + mov x6, #0 + mov x7, #10 // For the remainder of the loop we always hash and encrypt 320 bytes per iteration + + b Lseal_main_loop + +Lseal_tail: + // This part of the function handles the storage and authentication of the last [0,320) bytes + // We assume A0-A4 ... D0-D4 hold at least inl (320 max) bytes of the stream data. + cmp x2, #64 + b.lt Lseal_tail_64 + + // Store and authenticate 64B blocks per iteration + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v22.d[0] + mov x12, v22.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v23.d[0] + mov x12, v23.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + st1 {v20.16b - v23.16b}, [x0], #64 + sub x2, x2, #64 + + // Shift the state left by 64 bytes for the next iteration of the loop + mov v0.16b, v1.16b + mov v5.16b, v6.16b + mov v10.16b, v11.16b + mov v15.16b, v16.16b + + mov v1.16b, v2.16b + mov v6.16b, v7.16b + mov v11.16b, v12.16b + mov v16.16b, v17.16b + + mov v2.16b, v3.16b + mov v7.16b, v8.16b + mov v12.16b, v13.16b + mov v17.16b, v18.16b + + mov v3.16b, v4.16b + mov v8.16b, v9.16b + mov v13.16b, v14.16b + mov v18.16b, v19.16b + + b Lseal_tail + +Lseal_tail_64: + ldp x3, x4, [x5, #48] // extra_in_len and extra_in_ptr + + // Here we handle the last [0,64) bytes of plaintext + cmp x2, #16 + b.lt Lseal_tail_16 + // Each iteration encrypt and authenticate a 16B block + ld1 {v20.16b}, [x1], #16 + eor v20.16b, v20.16b, v0.16b + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + st1 {v20.16b}, [x0], #16 + + sub x2, x2, #16 + + // Shift the state left by 16 bytes for the next iteration of the loop + mov v0.16b, v5.16b + mov v5.16b, v10.16b + mov v10.16b, v15.16b + + b Lseal_tail_64 + +Lseal_tail_16: + // Here we handle the last [0,16) bytes of ciphertext that require a padded block + cbz x2, Lseal_hash_extra + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the plaintext/extra in + eor v21.16b, v21.16b, v21.16b // Use T1 to generate an AND mask that will only mask the ciphertext bytes + not v22.16b, v20.16b + + mov x6, x2 + add x1, x1, x2 + + cbz x4, Lseal_tail_16_compose // No extra data to pad with, zero padding + + mov x7, #16 // We need to load some extra_in first for padding + sub x7, x7, x2 + cmp x4, x7 + csel x7, x4, x7, lt // Load the minimum of extra_in_len and the amount needed to fill the register + mov x12, x7 + add x3, x3, x7 + sub x4, x4, x7 + +Lseal_tail16_compose_extra_in: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, #-1]! + mov v20.b[0], w11 + subs x7, x7, #1 + b.gt Lseal_tail16_compose_extra_in + + add x3, x3, x12 + +Lseal_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x1, #-1]! + mov v20.b[0], w11 + ext v21.16b, v22.16b, v21.16b, #15 + subs x2, x2, #1 + b.gt Lseal_tail_16_compose + + and v0.16b, v0.16b, v21.16b + eor v20.16b, v20.16b, v0.16b + mov v21.16b, v20.16b + +Lseal_tail_16_store: + umov w11, v20.b[0] + strb w11, [x0], #1 + ext v20.16b, v20.16b, v20.16b, #1 + subs x6, x6, #1 + b.gt Lseal_tail_16_store + + // Hash in the final ct block concatenated with extra_in + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +Lseal_hash_extra: + cbz x4, Lseal_finalize + +Lseal_hash_extra_loop: + cmp x4, #16 + b.lt Lseal_hash_extra_tail + ld1 {v20.16b}, [x3], #16 + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #16 + b Lseal_hash_extra_loop + +Lseal_hash_extra_tail: + cbz x4, Lseal_finalize + eor v20.16b, v20.16b, v20.16b // Use T0 to load the remaining extra ciphertext + add x3, x3, x4 + +Lseal_hash_extra_load: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x3, #-1]! + mov v20.b[0], w11 + subs x4, x4, #1 + b.gt Lseal_hash_extra_load + + // Hash in the final padded extra_in blcok + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + +Lseal_finalize: + mov x11, v31.d[0] + mov x12, v31.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + // Final reduction step + sub x12, xzr, x15 + orr x13, xzr, #3 + subs x11, x8, #-5 + sbcs x12, x9, x12 + sbcs x13, x10, x13 + csel x8, x11, x8, cs + csel x9, x12, x9, cs + csel x10, x13, x10, cs + mov x11, v27.d[0] + mov x12, v27.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + + stp x8, x9, [x5] + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] +.cfi_restore b15 +.cfi_restore b14 +.cfi_restore b13 +.cfi_restore b12 +.cfi_restore b11 +.cfi_restore b10 +.cfi_restore b9 +.cfi_restore b8 + ldp x29, x30, [sp], 80 +.cfi_restore w29 +.cfi_restore w30 +.cfi_def_cfa_offset 0 + AARCH64_VALIDATE_LINK_REGISTER + ret + +Lseal_128: + // On some architectures preparing 5 blocks for small buffers is wasteful + eor v25.16b, v25.16b, v25.16b + mov x11, #1 + mov v25.s[0], w11 + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v17.16b, v30.16b + add v15.4s, v17.4s, v25.4s + add v16.4s, v15.4s, v25.4s + + mov x6, #10 + +Lseal_128_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x6, x6, #1 + b.hi Lseal_128_rounds + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + + // Only the first 32 bytes of the third block (counter = 0) are needed, + // so skip updating v12 and v17. + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + + add v30.4s, v30.4s, v25.4s + add v15.4s, v15.4s, v30.4s + add v30.4s, v30.4s, v25.4s + add v16.4s, v16.4s, v30.4s + + and v2.16b, v2.16b, v27.16b + mov x16, v2.d[0] // Move the R key to GPRs + mov x17, v2.d[1] + mov v27.16b, v7.16b // Store the S key + + bl Lpoly_hash_ad_internal + b Lseal_tail +.cfi_endproc + + +///////////////////////////////// +// +// void chacha20_poly1305_open(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, union open_data *aead_data); +// +.globl chacha20_poly1305_open + +.def chacha20_poly1305_open + .type 32 +.endef +.align 6 +chacha20_poly1305_open: + AARCH64_SIGN_LINK_REGISTER +.cfi_startproc + stp x29, x30, [sp, #-80]! +.cfi_def_cfa_offset 80 +.cfi_offset w30, -72 +.cfi_offset w29, -80 + mov x29, sp + // We probably could do .cfi_def_cfa w29, 80 at this point, but since + // we don't actually use the frame pointer like that, it's probably not + // worth bothering. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] +.cfi_offset b15, -8 +.cfi_offset b14, -16 +.cfi_offset b13, -24 +.cfi_offset b12, -32 +.cfi_offset b11, -40 +.cfi_offset b10, -48 +.cfi_offset b9, -56 +.cfi_offset b8, -64 + + adrp x11, Lchacha20_consts + add x11, x11, :lo12:Lchacha20_consts + + ld1 {v24.16b - v27.16b}, [x11] // Load the CONSTS, INC, ROL8 and CLAMP values + ld1 {v28.16b - v30.16b}, [x5] + + mov x15, #1 // Prepare the Poly1305 state + mov x8, #0 + mov x9, #0 + mov x10, #0 + + mov v31.d[0], x4 // Store the input and aad lengths + mov v31.d[1], x2 + + cmp x2, #128 + b.le Lopen_128 // Optimization for smaller buffers + + // Initially we prepare a single ChaCha20 block for the Poly1305 R and S keys + mov v0.16b, v24.16b + mov v5.16b, v28.16b + mov v10.16b, v29.16b + mov v15.16b, v30.16b + + mov x6, #10 + +.align 5 +Lopen_init_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + subs x6, x6, #1 + b.hi Lopen_init_rounds + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + + and v0.16b, v0.16b, v27.16b + mov x16, v0.d[0] // Move the R key to GPRs + mov x17, v0.d[1] + mov v27.16b, v5.16b // Store the S key + + bl Lpoly_hash_ad_internal + +Lopen_ad_done: + mov x3, x1 + +// Each iteration of the loop hash 320 bytes, and prepare stream for 320 bytes +Lopen_main_loop: + + cmp x2, #192 + b.lt Lopen_tail + + adrp x11, Lchacha20_consts + add x11, x11, :lo12:Lchacha20_consts + + ld4r {v0.4s,v1.4s,v2.4s,v3.4s}, [x11] + mov v4.16b, v24.16b + + ld4r {v5.4s,v6.4s,v7.4s,v8.4s}, [x5], #16 + mov v9.16b, v28.16b + + ld4r {v10.4s,v11.4s,v12.4s,v13.4s}, [x5], #16 + mov v14.16b, v29.16b + + ld4r {v15.4s,v16.4s,v17.4s,v18.4s}, [x5] + sub x5, x5, #32 + add v15.4s, v15.4s, v25.4s + mov v19.16b, v30.16b + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + lsr x4, x2, #4 // How many whole blocks we have to hash, will always be at least 12 + sub x4, x4, #10 + + mov x7, #10 + subs x6, x7, x4 + subs x6, x7, x4 // itr1 can be negative if we have more than 320 bytes to hash + csel x7, x7, x4, le // if itr1 is zero or less, itr2 should be 10 to indicate all 10 rounds are full + + cbz x7, Lopen_main_loop_rounds_short + +.align 5 +Lopen_main_loop_rounds: + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most +Lopen_main_loop_rounds_short: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v9.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v18.8h, v18.8h + rev32 v19.8h, v19.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + eor v8.16b, v8.16b, v13.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v9.4s, #20 + sli v8.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + add v4.4s, v4.4s, v8.4s + + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v18.16b, {v18.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + add v13.4s, v13.4s, v18.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v14.16b + + ushr v9.4s, v8.4s, #25 + sli v9.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #4 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #12 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + add v0.4s, v0.4s, v6.4s + add v1.4s, v1.4s, v7.4s + add v2.4s, v2.4s, v8.4s + add v3.4s, v3.4s, v5.4s + add v4.4s, v4.4s, v9.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + rev32 v18.8h, v18.8h + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + rev32 v19.8h, v19.8h + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v6.16b, v6.16b, v12.16b + eor v7.16b, v7.16b, v13.16b + eor v8.16b, v8.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v9.16b, v9.16b, v14.16b + + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + ushr v7.4s, v8.4s, #20 + sli v7.4s, v8.4s, #12 + ushr v8.4s, v5.4s, #20 + sli v8.4s, v5.4s, #12 + ushr v5.4s, v9.4s, #20 + sli v5.4s, v9.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v8.4s + add v4.4s, v4.4s, v5.4s + + eor v18.16b, v18.16b, v0.16b + eor v15.16b, v15.16b, v1.16b + eor v16.16b, v16.16b, v2.16b + eor v17.16b, v17.16b, v3.16b + eor v19.16b, v19.16b, v4.16b + + tbl v18.16b, {v18.16b}, v26.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + tbl v19.16b, {v19.16b}, v26.16b + + add v12.4s, v12.4s, v18.4s + add v13.4s, v13.4s, v15.4s + add v10.4s, v10.4s, v16.4s + add v11.4s, v11.4s, v17.4s + add v14.4s, v14.4s, v19.4s + + eor v20.16b, v20.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v7.16b, v7.16b, v10.16b + eor v8.16b, v8.16b, v11.16b + eor v5.16b, v5.16b, v14.16b + + ushr v9.4s, v5.4s, #25 + sli v9.4s, v5.4s, #7 + ushr v5.4s, v8.4s, #25 + sli v5.4s, v8.4s, #7 + ushr v8.4s, v7.4s, #25 + sli v8.4s, v7.4s, #7 + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + + ext v9.16b, v9.16b, v9.16b, #12 + ext v14.16b, v14.16b, v14.16b, #8 + ext v19.16b, v19.16b, v19.16b, #4 + subs x7, x7, #1 + b.gt Lopen_main_loop_rounds + subs x6, x6, #1 + b.ge Lopen_main_loop_rounds_short + + eor v20.16b, v20.16b, v20.16b //zero + not v21.16b, v20.16b // -1 + sub v21.4s, v25.4s, v21.4s // Add +1 + ext v20.16b, v21.16b, v20.16b, #12 // Get the last element (counter) + add v19.4s, v19.4s, v20.4s + + add v15.4s, v15.4s, v25.4s + mov x11, #5 + dup v20.4s, w11 + add v25.4s, v25.4s, v20.4s + + zip1 v20.4s, v0.4s, v1.4s + zip2 v21.4s, v0.4s, v1.4s + zip1 v22.4s, v2.4s, v3.4s + zip2 v23.4s, v2.4s, v3.4s + + zip1 v0.2d, v20.2d, v22.2d + zip2 v1.2d, v20.2d, v22.2d + zip1 v2.2d, v21.2d, v23.2d + zip2 v3.2d, v21.2d, v23.2d + + zip1 v20.4s, v5.4s, v6.4s + zip2 v21.4s, v5.4s, v6.4s + zip1 v22.4s, v7.4s, v8.4s + zip2 v23.4s, v7.4s, v8.4s + + zip1 v5.2d, v20.2d, v22.2d + zip2 v6.2d, v20.2d, v22.2d + zip1 v7.2d, v21.2d, v23.2d + zip2 v8.2d, v21.2d, v23.2d + + zip1 v20.4s, v10.4s, v11.4s + zip2 v21.4s, v10.4s, v11.4s + zip1 v22.4s, v12.4s, v13.4s + zip2 v23.4s, v12.4s, v13.4s + + zip1 v10.2d, v20.2d, v22.2d + zip2 v11.2d, v20.2d, v22.2d + zip1 v12.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + + zip1 v20.4s, v15.4s, v16.4s + zip2 v21.4s, v15.4s, v16.4s + zip1 v22.4s, v17.4s, v18.4s + zip2 v23.4s, v17.4s, v18.4s + + zip1 v15.2d, v20.2d, v22.2d + zip2 v16.2d, v20.2d, v22.2d + zip1 v17.2d, v21.2d, v23.2d + zip2 v18.2d, v21.2d, v23.2d + + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + + add v1.4s, v1.4s, v24.4s + add v6.4s, v6.4s, v28.4s + add v11.4s, v11.4s, v29.4s + add v16.4s, v16.4s, v30.4s + + add v2.4s, v2.4s, v24.4s + add v7.4s, v7.4s, v28.4s + add v12.4s, v12.4s, v29.4s + add v17.4s, v17.4s, v30.4s + + add v3.4s, v3.4s, v24.4s + add v8.4s, v8.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v18.4s, v18.4s, v30.4s + + add v4.4s, v4.4s, v24.4s + add v9.4s, v9.4s, v28.4s + add v14.4s, v14.4s, v29.4s + add v19.4s, v19.4s, v30.4s + + // We can always safely store 192 bytes + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #192 + + mov v0.16b, v3.16b + mov v5.16b, v8.16b + mov v10.16b, v13.16b + mov v15.16b, v18.16b + + cmp x2, #64 + b.lt Lopen_tail_64_store + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v22.16b, v22.16b, v13.16b + eor v23.16b, v23.16b, v18.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + + mov v0.16b, v4.16b + mov v5.16b, v9.16b + mov v10.16b, v14.16b + mov v15.16b, v19.16b + + cmp x2, #64 + b.lt Lopen_tail_64_store + + ld1 {v20.16b - v23.16b}, [x1], #64 + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v9.16b + eor v22.16b, v22.16b, v14.16b + eor v23.16b, v23.16b, v19.16b + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + b Lopen_main_loop + +Lopen_tail: + + cbz x2, Lopen_finalize + + lsr x4, x2, #4 // How many whole blocks we have to hash + + cmp x2, #64 + b.le Lopen_tail_64 + cmp x2, #128 + b.le Lopen_tail_128 + +Lopen_tail_192: + // We need three more blocks + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v15.16b, v30.16b + mov v16.16b, v30.16b + mov v17.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + eor v21.16b, v21.16b, v21.16b + ins v23.s[0], v25.s[0] + ins v21.d[0], x15 + + add v22.4s, v23.4s, v21.4s + add v21.4s, v22.4s, v21.4s + + add v15.4s, v15.4s, v21.4s + add v16.4s, v16.4s, v23.4s + add v17.4s, v17.4s, v22.4s + + mov x7, #10 + subs x6, x7, x4 // itr1 can be negative if we have more than 160 bytes to hash + csel x7, x7, x4, le // if itr1 is zero or less, itr2 should be 10 to indicate all 10 rounds are hashing + sub x4, x4, x7 + + cbz x7, Lopen_tail_192_rounds_no_hash + +Lopen_tail_192_rounds: + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most +Lopen_tail_192_rounds_no_hash: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x7, x7, #1 + b.gt Lopen_tail_192_rounds + subs x6, x6, #1 + b.ge Lopen_tail_192_rounds_no_hash + + // We hashed 160 bytes at most, may still have 32 bytes left +Lopen_tail_192_hash: + cbz x4, Lopen_tail_192_hash_done + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #1 + b Lopen_tail_192_hash + +Lopen_tail_192_hash_done: + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + add v12.4s, v12.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v16.4s, v16.4s, v30.4s + add v17.4s, v17.4s, v30.4s + + add v15.4s, v15.4s, v21.4s + add v16.4s, v16.4s, v23.4s + add v17.4s, v17.4s, v22.4s + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v7.16b + eor v22.16b, v22.16b, v12.16b + eor v23.16b, v23.16b, v17.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #128 + b Lopen_tail_64_store + +Lopen_tail_128: + // We need two more blocks + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v15.16b, v30.16b + mov v16.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + eor v22.16b, v22.16b, v22.16b + ins v23.s[0], v25.s[0] + ins v22.d[0], x15 + add v22.4s, v22.4s, v23.4s + + add v15.4s, v15.4s, v22.4s + add v16.4s, v16.4s, v23.4s + + mov x6, #10 + sub x6, x6, x4 + +Lopen_tail_128_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v1.4s, v1.4s, v6.4s + eor v16.16b, v16.16b, v1.16b + rev32 v16.8h, v16.8h + + add v11.4s, v11.4s, v16.4s + eor v6.16b, v6.16b, v11.16b + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + add v1.4s, v1.4s, v20.4s + eor v16.16b, v16.16b, v1.16b + tbl v16.16b, {v16.16b}, v26.16b + + add v11.4s, v11.4s, v16.4s + eor v20.16b, v20.16b, v11.16b + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + ext v6.16b, v6.16b, v6.16b, #4 + ext v11.16b, v11.16b, v11.16b, #8 + ext v16.16b, v16.16b, v16.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + add v1.4s, v1.4s, v6.4s + eor v16.16b, v16.16b, v1.16b + rev32 v16.8h, v16.8h + + add v11.4s, v11.4s, v16.4s + eor v6.16b, v6.16b, v11.16b + ushr v20.4s, v6.4s, #20 + sli v20.4s, v6.4s, #12 + add v1.4s, v1.4s, v20.4s + eor v16.16b, v16.16b, v1.16b + tbl v16.16b, {v16.16b}, v26.16b + + add v11.4s, v11.4s, v16.4s + eor v20.16b, v20.16b, v11.16b + ushr v6.4s, v20.4s, #25 + sli v6.4s, v20.4s, #7 + ext v6.16b, v6.16b, v6.16b, #12 + ext v11.16b, v11.16b, v11.16b, #8 + ext v16.16b, v16.16b, v16.16b, #4 + subs x6, x6, #1 + b.gt Lopen_tail_128_rounds + cbz x4, Lopen_tail_128_rounds_done + subs x4, x4, #1 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + b Lopen_tail_128_rounds + +Lopen_tail_128_rounds_done: + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v16.4s, v16.4s, v30.4s + add v15.4s, v15.4s, v22.4s + add v16.4s, v16.4s, v23.4s + + ld1 {v20.16b - v23.16b}, [x1], #64 + + eor v20.16b, v20.16b, v1.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v11.16b + eor v23.16b, v23.16b, v16.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + sub x2, x2, #64 + + b Lopen_tail_64_store + +Lopen_tail_64: + // We just need a single block + mov v0.16b, v24.16b + mov v5.16b, v28.16b + mov v10.16b, v29.16b + mov v15.16b, v30.16b + eor v23.16b, v23.16b, v23.16b + ins v23.s[0], v25.s[0] + add v15.4s, v15.4s, v23.4s + + mov x6, #10 + sub x6, x6, x4 + +Lopen_tail_64_rounds: + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #4 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #12 + add v0.4s, v0.4s, v5.4s + eor v15.16b, v15.16b, v0.16b + rev32 v15.8h, v15.8h + + add v10.4s, v10.4s, v15.4s + eor v5.16b, v5.16b, v10.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + add v0.4s, v0.4s, v20.4s + eor v15.16b, v15.16b, v0.16b + tbl v15.16b, {v15.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + eor v20.16b, v20.16b, v10.16b + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + ext v5.16b, v5.16b, v5.16b, #12 + ext v10.16b, v10.16b, v10.16b, #8 + ext v15.16b, v15.16b, v15.16b, #4 + subs x6, x6, #1 + b.gt Lopen_tail_64_rounds + cbz x4, Lopen_tail_64_rounds_done + subs x4, x4, #1 + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + b Lopen_tail_64_rounds + +Lopen_tail_64_rounds_done: + add v0.4s, v0.4s, v24.4s + add v5.4s, v5.4s, v28.4s + add v10.4s, v10.4s, v29.4s + add v15.4s, v15.4s, v30.4s + add v15.4s, v15.4s, v23.4s + +Lopen_tail_64_store: + cmp x2, #16 + b.lt Lopen_tail_16 + + ld1 {v20.16b}, [x1], #16 + eor v20.16b, v20.16b, v0.16b + st1 {v20.16b}, [x0], #16 + mov v0.16b, v5.16b + mov v5.16b, v10.16b + mov v10.16b, v15.16b + sub x2, x2, #16 + b Lopen_tail_64_store + +Lopen_tail_16: + // Here we handle the last [0,16) bytes that require a padded block + cbz x2, Lopen_finalize + + eor v20.16b, v20.16b, v20.16b // Use T0 to load the ciphertext + eor v21.16b, v21.16b, v21.16b // Use T1 to generate an AND mask + not v22.16b, v20.16b + + add x7, x1, x2 + mov x6, x2 + +Lopen_tail_16_compose: + ext v20.16b, v20.16b, v20.16b, #15 + ldrb w11, [x7, #-1]! + mov v20.b[0], w11 + ext v21.16b, v22.16b, v21.16b, #15 + subs x2, x2, #1 + b.gt Lopen_tail_16_compose + + and v20.16b, v20.16b, v21.16b + // Hash in the final padded block + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + eor v20.16b, v20.16b, v0.16b + +Lopen_tail_16_store: + umov w11, v20.b[0] + strb w11, [x0], #1 + ext v20.16b, v20.16b, v20.16b, #1 + subs x6, x6, #1 + b.gt Lopen_tail_16_store + +Lopen_finalize: + mov x11, v31.d[0] + mov x12, v31.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + // Final reduction step + sub x12, xzr, x15 + orr x13, xzr, #3 + subs x11, x8, #-5 + sbcs x12, x9, x12 + sbcs x13, x10, x13 + csel x8, x11, x8, cs + csel x9, x12, x9, cs + csel x10, x13, x10, cs + mov x11, v27.d[0] + mov x12, v27.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + + stp x8, x9, [x5] + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] +.cfi_restore b15 +.cfi_restore b14 +.cfi_restore b13 +.cfi_restore b12 +.cfi_restore b11 +.cfi_restore b10 +.cfi_restore b9 +.cfi_restore b8 + ldp x29, x30, [sp], 80 +.cfi_restore w29 +.cfi_restore w30 +.cfi_def_cfa_offset 0 + AARCH64_VALIDATE_LINK_REGISTER + ret + +Lopen_128: + // On some architectures preparing 5 blocks for small buffers is wasteful + eor v25.16b, v25.16b, v25.16b + mov x11, #1 + mov v25.s[0], w11 + mov v0.16b, v24.16b + mov v1.16b, v24.16b + mov v2.16b, v24.16b + mov v5.16b, v28.16b + mov v6.16b, v28.16b + mov v7.16b, v28.16b + mov v10.16b, v29.16b + mov v11.16b, v29.16b + mov v12.16b, v29.16b + mov v17.16b, v30.16b + add v15.4s, v17.4s, v25.4s + add v16.4s, v15.4s, v25.4s + + mov x6, #10 + +Lopen_128_rounds: + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #4 + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #4 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #12 + ext v16.16b, v16.16b, v16.16b, #12 + ext v17.16b, v17.16b, v17.16b, #12 + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + rev32 v15.8h, v15.8h + rev32 v16.8h, v16.8h + rev32 v17.8h, v17.8h + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v5.16b, v5.16b, v10.16b + eor v6.16b, v6.16b, v11.16b + eor v7.16b, v7.16b, v12.16b + ushr v20.4s, v5.4s, #20 + sli v20.4s, v5.4s, #12 + ushr v5.4s, v6.4s, #20 + sli v5.4s, v6.4s, #12 + ushr v6.4s, v7.4s, #20 + sli v6.4s, v7.4s, #12 + + add v0.4s, v0.4s, v20.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + eor v15.16b, v15.16b, v0.16b + eor v16.16b, v16.16b, v1.16b + eor v17.16b, v17.16b, v2.16b + tbl v15.16b, {v15.16b}, v26.16b + tbl v16.16b, {v16.16b}, v26.16b + tbl v17.16b, {v17.16b}, v26.16b + + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v16.4s + add v12.4s, v12.4s, v17.4s + eor v20.16b, v20.16b, v10.16b + eor v5.16b, v5.16b, v11.16b + eor v6.16b, v6.16b, v12.16b + ushr v7.4s, v6.4s, #25 + sli v7.4s, v6.4s, #7 + ushr v6.4s, v5.4s, #25 + sli v6.4s, v5.4s, #7 + ushr v5.4s, v20.4s, #25 + sli v5.4s, v20.4s, #7 + + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + + ext v10.16b, v10.16b, v10.16b, #8 + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 + + ext v15.16b, v15.16b, v15.16b, #4 + ext v16.16b, v16.16b, v16.16b, #4 + ext v17.16b, v17.16b, v17.16b, #4 + subs x6, x6, #1 + b.hi Lopen_128_rounds + + add v0.4s, v0.4s, v24.4s + add v1.4s, v1.4s, v24.4s + add v2.4s, v2.4s, v24.4s + + add v5.4s, v5.4s, v28.4s + add v6.4s, v6.4s, v28.4s + add v7.4s, v7.4s, v28.4s + + add v10.4s, v10.4s, v29.4s + add v11.4s, v11.4s, v29.4s + + add v30.4s, v30.4s, v25.4s + add v15.4s, v15.4s, v30.4s + add v30.4s, v30.4s, v25.4s + add v16.4s, v16.4s, v30.4s + + and v2.16b, v2.16b, v27.16b + mov x16, v2.d[0] // Move the R key to GPRs + mov x17, v2.d[1] + mov v27.16b, v7.16b // Store the S key + + bl Lpoly_hash_ad_internal + +Lopen_128_store: + cmp x2, #64 + b.lt Lopen_128_store_64 + + ld1 {v20.16b - v23.16b}, [x1], #64 + + mov x11, v20.d[0] + mov x12, v20.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v21.d[0] + mov x12, v21.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v22.d[0] + mov x12, v22.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + mov x11, v23.d[0] + mov x12, v23.d[1] + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v15.16b + + st1 {v20.16b - v23.16b}, [x0], #64 + + sub x2, x2, #64 + + mov v0.16b, v1.16b + mov v5.16b, v6.16b + mov v10.16b, v11.16b + mov v15.16b, v16.16b + +Lopen_128_store_64: + + lsr x4, x2, #4 + mov x3, x1 + +Lopen_128_hash_64: + cbz x4, Lopen_tail_64_store + ldp x11, x12, [x3], 16 + adds x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x15 + mul x11, x8, x16 // [t2:t1:t0] = [acc2:acc1:acc0] * r0 + umulh x12, x8, x16 + mul x13, x9, x16 + umulh x14, x9, x16 + adds x12, x12, x13 + mul x13, x10, x16 + adc x13, x13, x14 + mul x14, x8, x17 // [t3:t2:t1:t0] = [acc2:acc1:acc0] * [r1:r0] + umulh x8, x8, x17 + adds x12, x12, x14 + mul x14, x9, x17 + umulh x9, x9, x17 + adcs x14, x14, x8 + mul x10, x10, x17 + adc x10, x10, x9 + adds x13, x13, x14 + adc x14, x10, xzr + and x10, x13, #3 // At this point acc2 is 2 bits at most (value of 3) + and x8, x13, #-4 + extr x13, x14, x13, #2 + adds x8, x8, x11 + lsr x11, x14, #2 + adc x9, x14, x11 // No carry out since t0 is 61 bits and t3 is 63 bits + adds x8, x8, x13 + adcs x9, x9, x12 + adc x10, x10, xzr // At this point acc2 has the value of 4 at most + sub x4, x4, #1 + b Lopen_128_hash_64 +.cfi_endproc + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-apple.S b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-apple.S new file mode 100644 index 0000000000..e4a7202570 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-apple.S @@ -0,0 +1,8875 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + +chacha20_poly1305_constants: + +.section __DATA,__const +.p2align 6 +L$chacha20_consts: +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +L$rol8: +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +L$rol16: +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 +L$avx2_init: +.long 0,0,0,0 +L$sse_inc: +.long 1,0,0,0 +L$avx2_inc: +.long 2,0,0,0,2,0,0,0 +L$clamp: +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC +.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF +.p2align 4 +L$and_masks: +.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.text + + +.p2align 6 +poly_hash_ad_internal: + + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + cmpq $13,%r8 + jne L$hash_ad_loop +L$poly_fast_tls_ad: + + movq (%rcx),%r10 + movq 5(%rcx),%r11 + shrq $24,%r11 + movq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + ret +L$hash_ad_loop: + + cmpq $16,%r8 + jb L$hash_ad_tail + addq 0+0(%rcx),%r10 + adcq 8+0(%rcx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rcx),%rcx + subq $16,%r8 + jmp L$hash_ad_loop +L$hash_ad_tail: + cmpq $0,%r8 + je L$hash_ad_done + + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + addq %r8,%rcx +L$hash_ad_tail_loop: + shldq $8,%r13,%r14 + shlq $8,%r13 + movzbq -1(%rcx),%r15 + xorq %r15,%r13 + decq %rcx + decq %r8 + jne L$hash_ad_tail_loop + + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +L$hash_ad_done: + ret + + + +.globl _chacha20_poly1305_open +.private_extern _chacha20_poly1305_open + +.p2align 6 +_chacha20_poly1305_open: + +_CET_ENDBR + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + + + pushq %r9 + + subq $288 + 0 + 32,%rsp + + + leaq 32(%rsp),%rbp + andq $-32,%rbp + + movq %rdx,%rbx + movq %r8,0+0+32(%rbp) + movq %rbx,8+0+32(%rbp) + + movl _OPENSSL_ia32cap_P+8(%rip),%eax + andl $288,%eax + xorl $288,%eax + jz chacha20_poly1305_open_avx2 + + cmpq $128,%rbx + jbe L$open_sse_128 + + movdqa L$chacha20_consts(%rip),%xmm0 + movdqu 0(%r9),%xmm4 + movdqu 16(%r9),%xmm8 + movdqu 32(%r9),%xmm12 + + movdqa %xmm12,%xmm7 + + movdqa %xmm4,0+48(%rbp) + movdqa %xmm8,0+64(%rbp) + movdqa %xmm12,0+96(%rbp) + movq $10,%r10 +L$open_sse_init_rounds: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + decq %r10 + jne L$open_sse_init_rounds + + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + + pand L$clamp(%rip),%xmm0 + movdqa %xmm0,0+0(%rbp) + movdqa %xmm4,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal +L$open_sse_main_loop: + cmpq $256,%rbx + jb L$open_sse_tail + + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa 0+96(%rbp),%xmm15 + paddd L$sse_inc(%rip),%xmm15 + movdqa %xmm15,%xmm14 + paddd L$sse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + + + + movq $4,%rcx + movq %rsi,%r8 +L$open_sse_main_loop_rounds: + movdqa %xmm8,0+80(%rbp) + movdqa L$rol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + + leaq 16(%r8),%r8 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movdqa L$rol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + movdqa %xmm8,0+80(%rbp) + movdqa L$rol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa L$rol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + decq %rcx + jge L$open_sse_main_loop_rounds + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + cmpq $-6,%rcx + jg L$open_sse_main_loop_rounds + paddd L$chacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd L$chacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqa %xmm12,0+80(%rbp) + movdqu 0 + 0(%rsi),%xmm12 + pxor %xmm3,%xmm12 + movdqu %xmm12,0 + 0(%rdi) + movdqu 16 + 0(%rsi),%xmm12 + pxor %xmm7,%xmm12 + movdqu %xmm12,16 + 0(%rdi) + movdqu 32 + 0(%rsi),%xmm12 + pxor %xmm11,%xmm12 + movdqu %xmm12,32 + 0(%rdi) + movdqu 48 + 0(%rsi),%xmm12 + pxor %xmm15,%xmm12 + movdqu %xmm12,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 64(%rdi) + movdqu %xmm6,16 + 64(%rdi) + movdqu %xmm10,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 128(%rdi) + movdqu %xmm5,16 + 128(%rdi) + movdqu %xmm9,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + movdqu 0 + 192(%rsi),%xmm3 + movdqu 16 + 192(%rsi),%xmm7 + movdqu 32 + 192(%rsi),%xmm11 + movdqu 48 + 192(%rsi),%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm7,%xmm4 + pxor %xmm11,%xmm8 + pxor 0+80(%rbp),%xmm15 + movdqu %xmm0,0 + 192(%rdi) + movdqu %xmm4,16 + 192(%rdi) + movdqu %xmm8,32 + 192(%rdi) + movdqu %xmm15,48 + 192(%rdi) + + leaq 256(%rsi),%rsi + leaq 256(%rdi),%rdi + subq $256,%rbx + jmp L$open_sse_main_loop +L$open_sse_tail: + + testq %rbx,%rbx + jz L$open_sse_finalize + cmpq $192,%rbx + ja L$open_sse_tail_256 + cmpq $128,%rbx + ja L$open_sse_tail_192 + cmpq $64,%rbx + ja L$open_sse_tail_128 + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa 0+96(%rbp),%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + + xorq %r8,%r8 + movq %rbx,%rcx + cmpq $16,%rcx + jb L$open_sse_tail_64_rounds +L$open_sse_tail_64_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + subq $16,%rcx +L$open_sse_tail_64_rounds: + addq $16,%r8 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + cmpq $16,%rcx + jae L$open_sse_tail_64_rounds_and_x1hash + cmpq $160,%r8 + jne L$open_sse_tail_64_rounds + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + jmp L$open_sse_tail_64_dec_loop + +L$open_sse_tail_128: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa 0+96(%rbp),%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + + movq %rbx,%rcx + andq $-16,%rcx + xorq %r8,%r8 +L$open_sse_tail_128_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +L$open_sse_tail_128_rounds: + addq $16,%r8 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + + cmpq %rcx,%r8 + jb L$open_sse_tail_128_rounds_and_x1hash + cmpq $160,%r8 + jne L$open_sse_tail_128_rounds + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 0(%rdi) + movdqu %xmm5,16 + 0(%rdi) + movdqu %xmm9,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + + subq $64,%rbx + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + jmp L$open_sse_tail_64_dec_loop + +L$open_sse_tail_192: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa 0+96(%rbp),%xmm14 + paddd L$sse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + + movq %rbx,%rcx + movq $160,%r8 + cmpq $160,%rcx + cmovgq %r8,%rcx + andq $-16,%rcx + xorq %r8,%r8 +L$open_sse_tail_192_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +L$open_sse_tail_192_rounds: + addq $16,%r8 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + cmpq %rcx,%r8 + jb L$open_sse_tail_192_rounds_and_x1hash + cmpq $160,%r8 + jne L$open_sse_tail_192_rounds + cmpq $176,%rbx + jb L$open_sse_tail_192_finish + addq 0+160(%rsi),%r10 + adcq 8+160(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + cmpq $192,%rbx + jb L$open_sse_tail_192_finish + addq 0+176(%rsi),%r10 + adcq 8+176(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +L$open_sse_tail_192_finish: + paddd L$chacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 0(%rdi) + movdqu %xmm6,16 + 0(%rdi) + movdqu %xmm10,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 64(%rdi) + movdqu %xmm5,16 + 64(%rdi) + movdqu %xmm9,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + + subq $128,%rbx + leaq 128(%rsi),%rsi + leaq 128(%rdi),%rdi + jmp L$open_sse_tail_64_dec_loop + +L$open_sse_tail_256: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa 0+96(%rbp),%xmm15 + paddd L$sse_inc(%rip),%xmm15 + movdqa %xmm15,%xmm14 + paddd L$sse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + + xorq %r8,%r8 +L$open_sse_tail_256_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movdqa %xmm11,0+80(%rbp) + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm4 + pxor %xmm11,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm4 + pxor %xmm11,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm5 + pxor %xmm11,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm5 + pxor %xmm11,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm6 + pxor %xmm11,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm6 + pxor %xmm11,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + movdqa 0+80(%rbp),%xmm11 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movdqa %xmm9,0+80(%rbp) + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb L$rol16(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $12,%xmm9 + psrld $20,%xmm7 + pxor %xmm9,%xmm7 + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb L$rol8(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $7,%xmm9 + psrld $25,%xmm7 + pxor %xmm9,%xmm7 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 + movdqa 0+80(%rbp),%xmm9 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + movdqa %xmm11,0+80(%rbp) + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm4 + pxor %xmm11,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm4 + pxor %xmm11,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm5 + pxor %xmm11,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm5 + pxor %xmm11,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm6 + pxor %xmm11,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm6 + pxor %xmm11,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + movdqa 0+80(%rbp),%xmm11 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + movdqa %xmm9,0+80(%rbp) + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb L$rol16(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $12,%xmm9 + psrld $20,%xmm7 + pxor %xmm9,%xmm7 + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb L$rol8(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $7,%xmm9 + psrld $25,%xmm7 + pxor %xmm9,%xmm7 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 + movdqa 0+80(%rbp),%xmm9 + + addq $16,%r8 + cmpq $160,%r8 + jb L$open_sse_tail_256_rounds_and_x1hash + + movq %rbx,%rcx + andq $-16,%rcx +L$open_sse_tail_256_hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + addq $16,%r8 + cmpq %rcx,%r8 + jb L$open_sse_tail_256_hash + paddd L$chacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd L$chacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqa %xmm12,0+80(%rbp) + movdqu 0 + 0(%rsi),%xmm12 + pxor %xmm3,%xmm12 + movdqu %xmm12,0 + 0(%rdi) + movdqu 16 + 0(%rsi),%xmm12 + pxor %xmm7,%xmm12 + movdqu %xmm12,16 + 0(%rdi) + movdqu 32 + 0(%rsi),%xmm12 + pxor %xmm11,%xmm12 + movdqu %xmm12,32 + 0(%rdi) + movdqu 48 + 0(%rsi),%xmm12 + pxor %xmm15,%xmm12 + movdqu %xmm12,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 64(%rdi) + movdqu %xmm6,16 + 64(%rdi) + movdqu %xmm10,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 128(%rdi) + movdqu %xmm5,16 + 128(%rdi) + movdqu %xmm9,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + + movdqa 0+80(%rbp),%xmm12 + subq $192,%rbx + leaq 192(%rsi),%rsi + leaq 192(%rdi),%rdi + + +L$open_sse_tail_64_dec_loop: + cmpq $16,%rbx + jb L$open_sse_tail_16_init + subq $16,%rbx + movdqu (%rsi),%xmm3 + pxor %xmm3,%xmm0 + movdqu %xmm0,(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + movdqa %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + jmp L$open_sse_tail_64_dec_loop +L$open_sse_tail_16_init: + movdqa %xmm0,%xmm1 + + +L$open_sse_tail_16: + testq %rbx,%rbx + jz L$open_sse_finalize + + + + pxor %xmm3,%xmm3 + leaq -1(%rsi,%rbx,1),%rsi + movq %rbx,%r8 +L$open_sse_tail_16_compose: + pslldq $1,%xmm3 + pinsrb $0,(%rsi),%xmm3 + subq $1,%rsi + subq $1,%r8 + jnz L$open_sse_tail_16_compose + +.byte 102,73,15,126,221 + pextrq $1,%xmm3,%r14 + + pxor %xmm1,%xmm3 + + +L$open_sse_tail_16_extract: + pextrb $0,%xmm3,(%rdi) + psrldq $1,%xmm3 + addq $1,%rdi + subq $1,%rbx + jne L$open_sse_tail_16_extract + + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +L$open_sse_finalize: + addq 0+0+32(%rbp),%r10 + adcq 8+0+32(%rbp),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movq %r10,%r13 + movq %r11,%r14 + movq %r12,%r15 + subq $-5,%r10 + sbbq $-1,%r11 + sbbq $3,%r12 + cmovcq %r13,%r10 + cmovcq %r14,%r11 + cmovcq %r15,%r12 + + addq 0+0+16(%rbp),%r10 + adcq 8+0+16(%rbp),%r11 + + + addq $288 + 0 + 32,%rsp + + + popq %r9 + + movq %r10,(%r9) + movq %r11,8(%r9) + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + popq %rbx + + popq %rbp + + ret + +L$open_sse_128: + + movdqu L$chacha20_consts(%rip),%xmm0 + movdqa %xmm0,%xmm1 + movdqa %xmm0,%xmm2 + movdqu 0(%r9),%xmm4 + movdqa %xmm4,%xmm5 + movdqa %xmm4,%xmm6 + movdqu 16(%r9),%xmm8 + movdqa %xmm8,%xmm9 + movdqa %xmm8,%xmm10 + movdqu 32(%r9),%xmm12 + movdqa %xmm12,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm14 + paddd L$sse_inc(%rip),%xmm14 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa %xmm13,%xmm15 + movq $10,%r10 + +L$open_sse_128_rounds: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + decq %r10 + jnz L$open_sse_128_rounds + paddd L$chacha20_consts(%rip),%xmm0 + paddd L$chacha20_consts(%rip),%xmm1 + paddd L$chacha20_consts(%rip),%xmm2 + paddd %xmm7,%xmm4 + paddd %xmm7,%xmm5 + paddd %xmm7,%xmm6 + paddd %xmm11,%xmm9 + paddd %xmm11,%xmm10 + paddd %xmm15,%xmm13 + paddd L$sse_inc(%rip),%xmm15 + paddd %xmm15,%xmm14 + + pand L$clamp(%rip),%xmm0 + movdqa %xmm0,0+0(%rbp) + movdqa %xmm4,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal +L$open_sse_128_xor_hash: + cmpq $16,%rbx + jb L$open_sse_tail_16 + subq $16,%rbx + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + + + movdqu 0(%rsi),%xmm3 + pxor %xmm3,%xmm1 + movdqu %xmm1,0(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movdqa %xmm5,%xmm1 + movdqa %xmm9,%xmm5 + movdqa %xmm13,%xmm9 + movdqa %xmm2,%xmm13 + movdqa %xmm6,%xmm2 + movdqa %xmm10,%xmm6 + movdqa %xmm14,%xmm10 + jmp L$open_sse_128_xor_hash + + + + + + + + + +.globl _chacha20_poly1305_seal +.private_extern _chacha20_poly1305_seal + +.p2align 6 +_chacha20_poly1305_seal: + +_CET_ENDBR + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + + + pushq %r9 + + subq $288 + 0 + 32,%rsp + + leaq 32(%rsp),%rbp + andq $-32,%rbp + + movq 56(%r9),%rbx + addq %rdx,%rbx + movq %r8,0+0+32(%rbp) + movq %rbx,8+0+32(%rbp) + movq %rdx,%rbx + + movl _OPENSSL_ia32cap_P+8(%rip),%eax + andl $288,%eax + xorl $288,%eax + jz chacha20_poly1305_seal_avx2 + + cmpq $128,%rbx + jbe L$seal_sse_128 + + movdqa L$chacha20_consts(%rip),%xmm0 + movdqu 0(%r9),%xmm4 + movdqu 16(%r9),%xmm8 + movdqu 32(%r9),%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm0,%xmm2 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm5 + movdqa %xmm4,%xmm6 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm9 + movdqa %xmm8,%xmm10 + movdqa %xmm8,%xmm11 + movdqa %xmm12,%xmm15 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,%xmm14 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,%xmm13 + paddd L$sse_inc(%rip),%xmm12 + + movdqa %xmm4,0+48(%rbp) + movdqa %xmm8,0+64(%rbp) + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + movq $10,%r10 +L$seal_sse_init_rounds: + movdqa %xmm8,0+80(%rbp) + movdqa L$rol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa L$rol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + movdqa %xmm8,0+80(%rbp) + movdqa L$rol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa L$rol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + decq %r10 + jnz L$seal_sse_init_rounds + paddd L$chacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd L$chacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + + pand L$clamp(%rip),%xmm3 + movdqa %xmm3,0+0(%rbp) + movdqa %xmm7,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 0(%rdi) + movdqu %xmm6,16 + 0(%rdi) + movdqu %xmm10,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 64(%rdi) + movdqu %xmm5,16 + 64(%rdi) + movdqu %xmm9,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + + cmpq $192,%rbx + ja L$seal_sse_main_init + movq $128,%rcx + subq $128,%rbx + leaq 128(%rsi),%rsi + jmp L$seal_sse_128_tail_hash +L$seal_sse_main_init: + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm7,%xmm4 + pxor %xmm11,%xmm8 + pxor %xmm12,%xmm15 + movdqu %xmm0,0 + 128(%rdi) + movdqu %xmm4,16 + 128(%rdi) + movdqu %xmm8,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + + movq $192,%rcx + subq $192,%rbx + leaq 192(%rsi),%rsi + movq $2,%rcx + movq $8,%r8 + cmpq $64,%rbx + jbe L$seal_sse_tail_64 + cmpq $128,%rbx + jbe L$seal_sse_tail_128 + cmpq $192,%rbx + jbe L$seal_sse_tail_192 + +L$seal_sse_main_loop: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa 0+96(%rbp),%xmm15 + paddd L$sse_inc(%rip),%xmm15 + movdqa %xmm15,%xmm14 + paddd L$sse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + +.p2align 5 +L$seal_sse_main_rounds: + movdqa %xmm8,0+80(%rbp) + movdqa L$rol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movdqa L$rol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + movdqa %xmm8,0+80(%rbp) + movdqa L$rol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa L$rol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + leaq 16(%rdi),%rdi + decq %r8 + jge L$seal_sse_main_rounds + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi + decq %rcx + jg L$seal_sse_main_rounds + paddd L$chacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd L$chacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + movdqa %xmm14,0+80(%rbp) + movdqa %xmm14,0+80(%rbp) + movdqu 0 + 0(%rsi),%xmm14 + pxor %xmm3,%xmm14 + movdqu %xmm14,0 + 0(%rdi) + movdqu 16 + 0(%rsi),%xmm14 + pxor %xmm7,%xmm14 + movdqu %xmm14,16 + 0(%rdi) + movdqu 32 + 0(%rsi),%xmm14 + pxor %xmm11,%xmm14 + movdqu %xmm14,32 + 0(%rdi) + movdqu 48 + 0(%rsi),%xmm14 + pxor %xmm15,%xmm14 + movdqu %xmm14,48 + 0(%rdi) + + movdqa 0+80(%rbp),%xmm14 + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 64(%rdi) + movdqu %xmm6,16 + 64(%rdi) + movdqu %xmm10,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 128(%rdi) + movdqu %xmm5,16 + 128(%rdi) + movdqu %xmm9,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + + cmpq $256,%rbx + ja L$seal_sse_main_loop_xor + + movq $192,%rcx + subq $192,%rbx + leaq 192(%rsi),%rsi + jmp L$seal_sse_128_tail_hash +L$seal_sse_main_loop_xor: + movdqu 0 + 192(%rsi),%xmm3 + movdqu 16 + 192(%rsi),%xmm7 + movdqu 32 + 192(%rsi),%xmm11 + movdqu 48 + 192(%rsi),%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm7,%xmm4 + pxor %xmm11,%xmm8 + pxor %xmm12,%xmm15 + movdqu %xmm0,0 + 192(%rdi) + movdqu %xmm4,16 + 192(%rdi) + movdqu %xmm8,32 + 192(%rdi) + movdqu %xmm15,48 + 192(%rdi) + + leaq 256(%rsi),%rsi + subq $256,%rbx + movq $6,%rcx + movq $4,%r8 + cmpq $192,%rbx + jg L$seal_sse_main_loop + movq %rbx,%rcx + testq %rbx,%rbx + je L$seal_sse_128_tail_hash + movq $6,%rcx + cmpq $128,%rbx + ja L$seal_sse_tail_192 + cmpq $64,%rbx + ja L$seal_sse_tail_128 + +L$seal_sse_tail_64: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa 0+96(%rbp),%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + +L$seal_sse_tail_64_rounds_and_x2hash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_sse_tail_64_rounds_and_x1hash: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi + decq %rcx + jg L$seal_sse_tail_64_rounds_and_x2hash + decq %r8 + jge L$seal_sse_tail_64_rounds_and_x1hash + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + jmp L$seal_sse_128_tail_xor + +L$seal_sse_tail_128: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa 0+96(%rbp),%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + +L$seal_sse_tail_128_rounds_and_x2hash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_sse_tail_128_rounds_and_x1hash: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + + leaq 16(%rdi),%rdi + decq %rcx + jg L$seal_sse_tail_128_rounds_and_x2hash + decq %r8 + jge L$seal_sse_tail_128_rounds_and_x1hash + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 0(%rdi) + movdqu %xmm5,16 + 0(%rdi) + movdqu %xmm9,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + + movq $64,%rcx + subq $64,%rbx + leaq 64(%rsi),%rsi + jmp L$seal_sse_128_tail_hash + +L$seal_sse_tail_192: + movdqa L$chacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa 0+96(%rbp),%xmm14 + paddd L$sse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + +L$seal_sse_tail_192_rounds_and_x2hash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_sse_tail_192_rounds_and_x1hash: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + leaq 16(%rdi),%rdi + decq %rcx + jg L$seal_sse_tail_192_rounds_and_x2hash + decq %r8 + jge L$seal_sse_tail_192_rounds_and_x1hash + paddd L$chacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd L$chacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd L$chacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 0(%rdi) + movdqu %xmm6,16 + 0(%rdi) + movdqu %xmm10,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 64(%rdi) + movdqu %xmm5,16 + 64(%rdi) + movdqu %xmm9,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + + movq $128,%rcx + subq $128,%rbx + leaq 128(%rsi),%rsi + +L$seal_sse_128_tail_hash: + cmpq $16,%rcx + jb L$seal_sse_128_tail_xor + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + subq $16,%rcx + leaq 16(%rdi),%rdi + jmp L$seal_sse_128_tail_hash + +L$seal_sse_128_tail_xor: + cmpq $16,%rbx + jb L$seal_sse_tail_16 + subq $16,%rbx + + movdqu 0(%rsi),%xmm3 + pxor %xmm3,%xmm0 + movdqu %xmm0,0(%rdi) + + addq 0(%rdi),%r10 + adcq 8(%rdi),%r11 + adcq $1,%r12 + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movdqa %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm1 + movdqa %xmm9,%xmm5 + movdqa %xmm13,%xmm9 + jmp L$seal_sse_128_tail_xor + +L$seal_sse_tail_16: + testq %rbx,%rbx + jz L$process_blocks_of_extra_in + + movq %rbx,%r8 + movq %rbx,%rcx + leaq -1(%rsi,%rbx,1),%rsi + pxor %xmm15,%xmm15 +L$seal_sse_tail_16_compose: + pslldq $1,%xmm15 + pinsrb $0,(%rsi),%xmm15 + leaq -1(%rsi),%rsi + decq %rcx + jne L$seal_sse_tail_16_compose + + + pxor %xmm0,%xmm15 + + + movq %rbx,%rcx + movdqu %xmm15,%xmm0 +L$seal_sse_tail_16_extract: + pextrb $0,%xmm0,(%rdi) + psrldq $1,%xmm0 + addq $1,%rdi + subq $1,%rcx + jnz L$seal_sse_tail_16_extract + + + + + + + + + movq 288 + 0 + 32(%rsp),%r9 + movq 56(%r9),%r14 + movq 48(%r9),%r13 + testq %r14,%r14 + jz L$process_partial_block + + movq $16,%r15 + subq %rbx,%r15 + cmpq %r15,%r14 + + jge L$load_extra_in + movq %r14,%r15 + +L$load_extra_in: + + + leaq -1(%r13,%r15,1),%rsi + + + addq %r15,%r13 + subq %r15,%r14 + movq %r13,48(%r9) + movq %r14,56(%r9) + + + + addq %r15,%r8 + + + pxor %xmm11,%xmm11 +L$load_extra_load_loop: + pslldq $1,%xmm11 + pinsrb $0,(%rsi),%xmm11 + leaq -1(%rsi),%rsi + subq $1,%r15 + jnz L$load_extra_load_loop + + + + + movq %rbx,%r15 + +L$load_extra_shift_loop: + pslldq $1,%xmm11 + subq $1,%r15 + jnz L$load_extra_shift_loop + + + + + leaq L$and_masks(%rip),%r15 + shlq $4,%rbx + pand -16(%r15,%rbx,1),%xmm15 + + + por %xmm11,%xmm15 + + + +.byte 102,77,15,126,253 + pextrq $1,%xmm15,%r14 + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +L$process_blocks_of_extra_in: + + movq 288+32+0 (%rsp),%r9 + movq 48(%r9),%rsi + movq 56(%r9),%r8 + movq %r8,%rcx + shrq $4,%r8 + +L$process_extra_hash_loop: + jz process_extra_in_trailer + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rsi),%rsi + subq $1,%r8 + jmp L$process_extra_hash_loop +process_extra_in_trailer: + andq $15,%rcx + movq %rcx,%rbx + jz L$do_length_block + leaq -1(%rsi,%rcx,1),%rsi + +L$process_extra_in_trailer_load: + pslldq $1,%xmm15 + pinsrb $0,(%rsi),%xmm15 + leaq -1(%rsi),%rsi + subq $1,%rcx + jnz L$process_extra_in_trailer_load + +L$process_partial_block: + + leaq L$and_masks(%rip),%r15 + shlq $4,%rbx + pand -16(%r15,%rbx,1),%xmm15 +.byte 102,77,15,126,253 + pextrq $1,%xmm15,%r14 + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +L$do_length_block: + addq 0+0+32(%rbp),%r10 + adcq 8+0+32(%rbp),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movq %r10,%r13 + movq %r11,%r14 + movq %r12,%r15 + subq $-5,%r10 + sbbq $-1,%r11 + sbbq $3,%r12 + cmovcq %r13,%r10 + cmovcq %r14,%r11 + cmovcq %r15,%r12 + + addq 0+0+16(%rbp),%r10 + adcq 8+0+16(%rbp),%r11 + + + addq $288 + 0 + 32,%rsp + + + popq %r9 + + movq %r10,(%r9) + movq %r11,8(%r9) + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + popq %rbx + + popq %rbp + + ret + +L$seal_sse_128: + + movdqu L$chacha20_consts(%rip),%xmm0 + movdqa %xmm0,%xmm1 + movdqa %xmm0,%xmm2 + movdqu 0(%r9),%xmm4 + movdqa %xmm4,%xmm5 + movdqa %xmm4,%xmm6 + movdqu 16(%r9),%xmm8 + movdqa %xmm8,%xmm9 + movdqa %xmm8,%xmm10 + movdqu 32(%r9),%xmm14 + movdqa %xmm14,%xmm12 + paddd L$sse_inc(%rip),%xmm12 + movdqa %xmm12,%xmm13 + paddd L$sse_inc(%rip),%xmm13 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa %xmm12,%xmm15 + movq $10,%r10 + +L$seal_sse_128_rounds: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb L$rol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb L$rol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb L$rol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + decq %r10 + jnz L$seal_sse_128_rounds + paddd L$chacha20_consts(%rip),%xmm0 + paddd L$chacha20_consts(%rip),%xmm1 + paddd L$chacha20_consts(%rip),%xmm2 + paddd %xmm7,%xmm4 + paddd %xmm7,%xmm5 + paddd %xmm7,%xmm6 + paddd %xmm11,%xmm8 + paddd %xmm11,%xmm9 + paddd %xmm15,%xmm12 + paddd L$sse_inc(%rip),%xmm15 + paddd %xmm15,%xmm13 + + pand L$clamp(%rip),%xmm2 + movdqa %xmm2,0+0(%rbp) + movdqa %xmm6,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal + jmp L$seal_sse_128_tail_xor + + + + + +.p2align 6 +chacha20_poly1305_open_avx2: + + + + + + + + + + + + + vzeroupper + vmovdqa L$chacha20_consts(%rip),%ymm0 + vbroadcasti128 0(%r9),%ymm4 + vbroadcasti128 16(%r9),%ymm8 + vbroadcasti128 32(%r9),%ymm12 + vpaddd L$avx2_init(%rip),%ymm12,%ymm12 + cmpq $192,%rbx + jbe L$open_avx2_192 + cmpq $320,%rbx + jbe L$open_avx2_320 + + vmovdqa %ymm4,0+64(%rbp) + vmovdqa %ymm8,0+96(%rbp) + vmovdqa %ymm12,0+160(%rbp) + movq $10,%r10 +L$open_avx2_init_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + + decq %r10 + jne L$open_avx2_init_rounds + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand L$clamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + + movq %r8,%r8 + call poly_hash_ad_internal + + xorq %rcx,%rcx +L$open_avx2_init_hash: + addq 0+0(%rsi,%rcx,1),%r10 + adcq 8+0(%rsi,%rcx,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + addq $16,%rcx + cmpq $64,%rcx + jne L$open_avx2_init_hash + + vpxor 0(%rsi),%ymm0,%ymm0 + vpxor 32(%rsi),%ymm4,%ymm4 + + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm4,32(%rdi) + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + subq $64,%rbx +L$open_avx2_main_loop: + + cmpq $512,%rbx + jb L$open_avx2_main_loop_done + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + + xorq %rcx,%rcx +L$open_avx2_main_loop_rounds: + addq 0+0(%rsi,%rcx,1),%r10 + adcq 8+0(%rsi,%rcx,1),%r11 + adcq $1,%r12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + addq %rax,%r15 + adcq %rdx,%r9 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + addq 0+16(%rsi,%rcx,1),%r10 + adcq 8+16(%rsi,%rcx,1),%r11 + adcq $1,%r12 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + addq %rax,%r15 + adcq %rdx,%r9 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq 0+32(%rsi,%rcx,1),%r10 + adcq 8+32(%rsi,%rcx,1),%r11 + adcq $1,%r12 + + leaq 48(%rcx),%rcx + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq %rax,%r15 + adcq %rdx,%r9 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + cmpq $60*8,%rcx + jne L$open_avx2_main_loop_rounds + vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + addq 0+60*8(%rsi),%r10 + adcq 8+60*8(%rsi),%r11 + adcq $1,%r12 + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + addq 0+60*8+16(%rsi),%r10 + adcq 8+60*8+16(%rsi),%r11 + adcq $1,%r12 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 + vpxor 0+384(%rsi),%ymm3,%ymm3 + vpxor 32+384(%rsi),%ymm0,%ymm0 + vpxor 64+384(%rsi),%ymm4,%ymm4 + vpxor 96+384(%rsi),%ymm8,%ymm8 + vmovdqu %ymm3,0+384(%rdi) + vmovdqu %ymm0,32+384(%rdi) + vmovdqu %ymm4,64+384(%rdi) + vmovdqu %ymm8,96+384(%rdi) + + leaq 512(%rsi),%rsi + leaq 512(%rdi),%rdi + subq $512,%rbx + jmp L$open_avx2_main_loop +L$open_avx2_main_loop_done: + testq %rbx,%rbx + vzeroupper + je L$open_sse_finalize + + cmpq $384,%rbx + ja L$open_avx2_tail_512 + cmpq $256,%rbx + ja L$open_avx2_tail_384 + cmpq $128,%rbx + ja L$open_avx2_tail_256 + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + + xorq %r8,%r8 + movq %rbx,%rcx + andq $-16,%rcx + testq %rcx,%rcx + je L$open_avx2_tail_128_rounds +L$open_avx2_tail_128_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +L$open_avx2_tail_128_rounds: + addq $16,%r8 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + + cmpq %rcx,%r8 + jb L$open_avx2_tail_128_rounds_and_x1hash + cmpq $160,%r8 + jne L$open_avx2_tail_128_rounds + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + jmp L$open_avx2_tail_128_xor + +L$open_avx2_tail_256: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + + movq %rbx,0+128(%rbp) + movq %rbx,%rcx + subq $128,%rcx + shrq $4,%rcx + movq $10,%r8 + cmpq $10,%rcx + cmovgq %r8,%rcx + movq %rsi,%rbx + xorq %r8,%r8 +L$open_avx2_tail_256_rounds_and_x1hash: + addq 0+0(%rbx),%r10 + adcq 8+0(%rbx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rbx),%rbx +L$open_avx2_tail_256_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + + incq %r8 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + cmpq %rcx,%r8 + jb L$open_avx2_tail_256_rounds_and_x1hash + cmpq $10,%r8 + jne L$open_avx2_tail_256_rounds + movq %rbx,%r8 + subq %rsi,%rbx + movq %rbx,%rcx + movq 0+128(%rbp),%rbx +L$open_avx2_tail_256_hash: + addq $16,%rcx + cmpq %rbx,%rcx + jg L$open_avx2_tail_256_done + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + jmp L$open_avx2_tail_256_hash +L$open_avx2_tail_256_done: + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm1,%ymm1 + vpxor 64+0(%rsi),%ymm5,%ymm5 + vpxor 96+0(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm1,32+0(%rdi) + vmovdqu %ymm5,64+0(%rdi) + vmovdqu %ymm9,96+0(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + leaq 128(%rsi),%rsi + leaq 128(%rdi),%rdi + subq $128,%rbx + jmp L$open_avx2_tail_128_xor + +L$open_avx2_tail_384: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + + movq %rbx,0+128(%rbp) + movq %rbx,%rcx + subq $256,%rcx + shrq $4,%rcx + addq $6,%rcx + movq $10,%r8 + cmpq $10,%rcx + cmovgq %r8,%rcx + movq %rsi,%rbx + xorq %r8,%r8 +L$open_avx2_tail_384_rounds_and_x2hash: + addq 0+0(%rbx),%r10 + adcq 8+0(%rbx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rbx),%rbx +L$open_avx2_tail_384_rounds_and_x1hash: + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + addq 0+0(%rbx),%r10 + adcq 8+0(%rbx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rbx),%rbx + incq %r8 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + + cmpq %rcx,%r8 + jb L$open_avx2_tail_384_rounds_and_x2hash + cmpq $10,%r8 + jne L$open_avx2_tail_384_rounds_and_x1hash + movq %rbx,%r8 + subq %rsi,%rbx + movq %rbx,%rcx + movq 0+128(%rbp),%rbx +L$open_avx2_384_tail_hash: + addq $16,%rcx + cmpq %rbx,%rcx + jg L$open_avx2_384_tail_done + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + jmp L$open_avx2_384_tail_hash +L$open_avx2_384_tail_done: + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm2,%ymm2 + vpxor 64+0(%rsi),%ymm6,%ymm6 + vpxor 96+0(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm2,32+0(%rdi) + vmovdqu %ymm6,64+0(%rdi) + vmovdqu %ymm10,96+0(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm1,%ymm1 + vpxor 64+128(%rsi),%ymm5,%ymm5 + vpxor 96+128(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm1,32+128(%rdi) + vmovdqu %ymm5,64+128(%rdi) + vmovdqu %ymm9,96+128(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + leaq 256(%rsi),%rsi + leaq 256(%rdi),%rdi + subq $256,%rbx + jmp L$open_avx2_tail_128_xor + +L$open_avx2_tail_512: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + + xorq %rcx,%rcx + movq %rsi,%r8 +L$open_avx2_tail_512_rounds_and_x2hash: + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 +L$open_avx2_tail_512_rounds_and_x1hash: + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + addq 0+16(%r8),%r10 + adcq 8+16(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%r8),%r8 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + incq %rcx + cmpq $4,%rcx + jl L$open_avx2_tail_512_rounds_and_x2hash + cmpq $10,%rcx + jne L$open_avx2_tail_512_rounds_and_x1hash + movq %rbx,%rcx + subq $384,%rcx + andq $-16,%rcx +L$open_avx2_tail_512_hash: + testq %rcx,%rcx + je L$open_avx2_tail_512_done + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + subq $16,%rcx + jmp L$open_avx2_tail_512_hash +L$open_avx2_tail_512_done: + vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + leaq 384(%rsi),%rsi + leaq 384(%rdi),%rdi + subq $384,%rbx +L$open_avx2_tail_128_xor: + cmpq $32,%rbx + jb L$open_avx2_tail_32_xor + subq $32,%rbx + vpxor (%rsi),%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + leaq 32(%rsi),%rsi + leaq 32(%rdi),%rdi + vmovdqa %ymm4,%ymm0 + vmovdqa %ymm8,%ymm4 + vmovdqa %ymm12,%ymm8 + jmp L$open_avx2_tail_128_xor +L$open_avx2_tail_32_xor: + cmpq $16,%rbx + vmovdqa %xmm0,%xmm1 + jb L$open_avx2_exit + subq $16,%rbx + + vpxor (%rsi),%xmm0,%xmm1 + vmovdqu %xmm1,(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 + vmovdqa %xmm0,%xmm1 +L$open_avx2_exit: + vzeroupper + jmp L$open_sse_tail_16 + +L$open_avx2_192: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 + vmovdqa %ymm12,%ymm11 + vmovdqa %ymm13,%ymm15 + movq $10,%r10 +L$open_avx2_192_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + + decq %r10 + jne L$open_avx2_192_rounds + vpaddd %ymm2,%ymm0,%ymm0 + vpaddd %ymm2,%ymm1,%ymm1 + vpaddd %ymm6,%ymm4,%ymm4 + vpaddd %ymm6,%ymm5,%ymm5 + vpaddd %ymm10,%ymm8,%ymm8 + vpaddd %ymm10,%ymm9,%ymm9 + vpaddd %ymm11,%ymm12,%ymm12 + vpaddd %ymm15,%ymm13,%ymm13 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand L$clamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +L$open_avx2_short: + movq %r8,%r8 + call poly_hash_ad_internal +L$open_avx2_short_hash_and_xor_loop: + cmpq $32,%rbx + jb L$open_avx2_short_tail_32 + subq $32,%rbx + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rsi),%r10 + adcq 8+16(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + vpxor (%rsi),%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + leaq 32(%rsi),%rsi + leaq 32(%rdi),%rdi + + vmovdqa %ymm4,%ymm0 + vmovdqa %ymm8,%ymm4 + vmovdqa %ymm12,%ymm8 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm5,%ymm1 + vmovdqa %ymm9,%ymm5 + vmovdqa %ymm13,%ymm9 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm6,%ymm2 + jmp L$open_avx2_short_hash_and_xor_loop +L$open_avx2_short_tail_32: + cmpq $16,%rbx + vmovdqa %xmm0,%xmm1 + jb L$open_avx2_short_tail_32_exit + subq $16,%rbx + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + vpxor (%rsi),%xmm0,%xmm3 + vmovdqu %xmm3,(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + vextracti128 $1,%ymm0,%xmm1 +L$open_avx2_short_tail_32_exit: + vzeroupper + jmp L$open_sse_tail_16 + +L$open_avx2_320: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 + vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + movq $10,%r10 +L$open_avx2_320_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + decq %r10 + jne L$open_avx2_320_rounds + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd %ymm7,%ymm4,%ymm4 + vpaddd %ymm7,%ymm5,%ymm5 + vpaddd %ymm7,%ymm6,%ymm6 + vpaddd %ymm11,%ymm8,%ymm8 + vpaddd %ymm11,%ymm9,%ymm9 + vpaddd %ymm11,%ymm10,%ymm10 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand L$clamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 + jmp L$open_avx2_short + + + + + +.p2align 6 +chacha20_poly1305_seal_avx2: + + + + + + + + + + + + + vzeroupper + vmovdqa L$chacha20_consts(%rip),%ymm0 + vbroadcasti128 0(%r9),%ymm4 + vbroadcasti128 16(%r9),%ymm8 + vbroadcasti128 32(%r9),%ymm12 + vpaddd L$avx2_init(%rip),%ymm12,%ymm12 + cmpq $192,%rbx + jbe L$seal_avx2_192 + cmpq $320,%rbx + jbe L$seal_avx2_320 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm4,0+64(%rbp) + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm8,%ymm11 + vmovdqa %ymm8,0+96(%rbp) + vmovdqa %ymm12,%ymm15 + vpaddd L$avx2_inc(%rip),%ymm15,%ymm14 + vpaddd L$avx2_inc(%rip),%ymm14,%ymm13 + vpaddd L$avx2_inc(%rip),%ymm13,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm15,0+256(%rbp) + movq $10,%r10 +L$seal_avx2_init_rounds: + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + decq %r10 + jnz L$seal_avx2_init_rounds + vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 + vpand L$clamp(%rip),%ymm15,%ymm15 + vmovdqa %ymm15,0+0(%rbp) + movq %r8,%r8 + call poly_hash_ad_internal + + vpxor 0(%rsi),%ymm3,%ymm3 + vpxor 32(%rsi),%ymm11,%ymm11 + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm11,32(%rdi) + vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+64(%rsi),%ymm15,%ymm15 + vpxor 32+64(%rsi),%ymm2,%ymm2 + vpxor 64+64(%rsi),%ymm6,%ymm6 + vpxor 96+64(%rsi),%ymm10,%ymm10 + vmovdqu %ymm15,0+64(%rdi) + vmovdqu %ymm2,32+64(%rdi) + vmovdqu %ymm6,64+64(%rdi) + vmovdqu %ymm10,96+64(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+192(%rsi),%ymm15,%ymm15 + vpxor 32+192(%rsi),%ymm1,%ymm1 + vpxor 64+192(%rsi),%ymm5,%ymm5 + vpxor 96+192(%rsi),%ymm9,%ymm9 + vmovdqu %ymm15,0+192(%rdi) + vmovdqu %ymm1,32+192(%rdi) + vmovdqu %ymm5,64+192(%rdi) + vmovdqu %ymm9,96+192(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm15,%ymm8 + + leaq 320(%rsi),%rsi + subq $320,%rbx + movq $320,%rcx + cmpq $128,%rbx + jbe L$seal_avx2_short_hash_remainder + vpxor 0(%rsi),%ymm0,%ymm0 + vpxor 32(%rsi),%ymm4,%ymm4 + vpxor 64(%rsi),%ymm8,%ymm8 + vpxor 96(%rsi),%ymm12,%ymm12 + vmovdqu %ymm0,320(%rdi) + vmovdqu %ymm4,352(%rdi) + vmovdqu %ymm8,384(%rdi) + vmovdqu %ymm12,416(%rdi) + leaq 128(%rsi),%rsi + subq $128,%rbx + movq $8,%rcx + movq $2,%r8 + cmpq $128,%rbx + jbe L$seal_avx2_tail_128 + cmpq $256,%rbx + jbe L$seal_avx2_tail_256 + cmpq $384,%rbx + jbe L$seal_avx2_tail_384 + cmpq $512,%rbx + jbe L$seal_avx2_tail_512 + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + + subq $16,%rdi + movq $9,%rcx + jmp L$seal_avx2_main_loop_rounds_entry +.p2align 5 +L$seal_avx2_main_loop: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + + movq $10,%rcx +.p2align 5 +L$seal_avx2_main_loop_rounds: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + addq %rax,%r15 + adcq %rdx,%r9 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +L$seal_avx2_main_loop_rounds_entry: + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + addq %rax,%r15 + adcq %rdx,%r9 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq 0+32(%rdi),%r10 + adcq 8+32(%rdi),%r11 + adcq $1,%r12 + + leaq 48(%rdi),%rdi + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq %rax,%r15 + adcq %rdx,%r9 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + decq %rcx + jne L$seal_avx2_main_loop_rounds + vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 + vpxor 0+384(%rsi),%ymm3,%ymm3 + vpxor 32+384(%rsi),%ymm0,%ymm0 + vpxor 64+384(%rsi),%ymm4,%ymm4 + vpxor 96+384(%rsi),%ymm8,%ymm8 + vmovdqu %ymm3,0+384(%rdi) + vmovdqu %ymm0,32+384(%rdi) + vmovdqu %ymm4,64+384(%rdi) + vmovdqu %ymm8,96+384(%rdi) + + leaq 512(%rsi),%rsi + subq $512,%rbx + cmpq $512,%rbx + jg L$seal_avx2_main_loop + + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + movq $10,%rcx + xorq %r8,%r8 + + cmpq $384,%rbx + ja L$seal_avx2_tail_512 + cmpq $256,%rbx + ja L$seal_avx2_tail_384 + cmpq $128,%rbx + ja L$seal_avx2_tail_256 + +L$seal_avx2_tail_128: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + +L$seal_avx2_tail_128_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_avx2_tail_128_rounds_and_2xhash: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + decq %rcx + jg L$seal_avx2_tail_128_rounds_and_3xhash + decq %r8 + jge L$seal_avx2_tail_128_rounds_and_2xhash + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + jmp L$seal_avx2_short_loop + +L$seal_avx2_tail_256: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + +L$seal_avx2_tail_256_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_avx2_tail_256_rounds_and_2xhash: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + decq %rcx + jg L$seal_avx2_tail_256_rounds_and_3xhash + decq %r8 + jge L$seal_avx2_tail_256_rounds_and_2xhash + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm1,%ymm1 + vpxor 64+0(%rsi),%ymm5,%ymm5 + vpxor 96+0(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm1,32+0(%rdi) + vmovdqu %ymm5,64+0(%rdi) + vmovdqu %ymm9,96+0(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + movq $128,%rcx + leaq 128(%rsi),%rsi + subq $128,%rbx + jmp L$seal_avx2_short_hash_remainder + +L$seal_avx2_tail_384: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + +L$seal_avx2_tail_384_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_avx2_tail_384_rounds_and_2xhash: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + leaq 32(%rdi),%rdi + decq %rcx + jg L$seal_avx2_tail_384_rounds_and_3xhash + decq %r8 + jge L$seal_avx2_tail_384_rounds_and_2xhash + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm2,%ymm2 + vpxor 64+0(%rsi),%ymm6,%ymm6 + vpxor 96+0(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm2,32+0(%rdi) + vmovdqu %ymm6,64+0(%rdi) + vmovdqu %ymm10,96+0(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm1,%ymm1 + vpxor 64+128(%rsi),%ymm5,%ymm5 + vpxor 96+128(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm1,32+128(%rdi) + vmovdqu %ymm5,64+128(%rdi) + vmovdqu %ymm9,96+128(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + movq $256,%rcx + leaq 256(%rsi),%rsi + subq $256,%rbx + jmp L$seal_avx2_short_hash_remainder + +L$seal_avx2_tail_512: + vmovdqa L$chacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa L$avx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + +L$seal_avx2_tail_512_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +L$seal_avx2_tail_512_rounds_and_2xhash: + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + addq %rax,%r15 + adcq %rdx,%r9 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa L$rol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa L$rol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + + + + + + + + + + + + + + + + addq %rax,%r15 + adcq %rdx,%r9 + + + + + + + + + + + + + + + + + + + + + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + decq %rcx + jg L$seal_avx2_tail_512_rounds_and_3xhash + decq %r8 + jge L$seal_avx2_tail_512_rounds_and_2xhash + vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + movq $384,%rcx + leaq 384(%rsi),%rsi + subq $384,%rbx + jmp L$seal_avx2_short_hash_remainder + +L$seal_avx2_320: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 + vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + movq $10,%r10 +L$seal_avx2_320_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb L$rol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + decq %r10 + jne L$seal_avx2_320_rounds + vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 + vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 + vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 + vpaddd %ymm7,%ymm4,%ymm4 + vpaddd %ymm7,%ymm5,%ymm5 + vpaddd %ymm7,%ymm6,%ymm6 + vpaddd %ymm11,%ymm8,%ymm8 + vpaddd %ymm11,%ymm9,%ymm9 + vpaddd %ymm11,%ymm10,%ymm10 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand L$clamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 + jmp L$seal_avx2_short + +L$seal_avx2_192: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 + vmovdqa %ymm12,%ymm11 + vmovdqa %ymm13,%ymm15 + movq $10,%r10 +L$seal_avx2_192_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb L$rol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb L$rol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + + decq %r10 + jne L$seal_avx2_192_rounds + vpaddd %ymm2,%ymm0,%ymm0 + vpaddd %ymm2,%ymm1,%ymm1 + vpaddd %ymm6,%ymm4,%ymm4 + vpaddd %ymm6,%ymm5,%ymm5 + vpaddd %ymm10,%ymm8,%ymm8 + vpaddd %ymm10,%ymm9,%ymm9 + vpaddd %ymm11,%ymm12,%ymm12 + vpaddd %ymm15,%ymm13,%ymm13 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand L$clamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +L$seal_avx2_short: + movq %r8,%r8 + call poly_hash_ad_internal + xorq %rcx,%rcx +L$seal_avx2_short_hash_remainder: + cmpq $16,%rcx + jb L$seal_avx2_short_loop + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + subq $16,%rcx + addq $16,%rdi + jmp L$seal_avx2_short_hash_remainder +L$seal_avx2_short_loop: + cmpq $32,%rbx + jb L$seal_avx2_short_tail + subq $32,%rbx + + vpxor (%rsi),%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + leaq 32(%rsi),%rsi + + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + + vmovdqa %ymm4,%ymm0 + vmovdqa %ymm8,%ymm4 + vmovdqa %ymm12,%ymm8 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm5,%ymm1 + vmovdqa %ymm9,%ymm5 + vmovdqa %ymm13,%ymm9 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm6,%ymm2 + jmp L$seal_avx2_short_loop +L$seal_avx2_short_tail: + cmpq $16,%rbx + jb L$seal_avx2_exit + subq $16,%rbx + vpxor (%rsi),%xmm0,%xmm3 + vmovdqu %xmm3,(%rdi) + leaq 16(%rsi),%rsi + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi + vextracti128 $1,%ymm0,%xmm0 +L$seal_avx2_exit: + vzeroupper + jmp L$seal_sse_tail_16 + + +#endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-linux.S b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-linux.S new file mode 100644 index 0000000000..ac38f8f7a3 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-linux.S @@ -0,0 +1,8918 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +chacha20_poly1305_constants: + +.section .rodata +.align 64 +.Lchacha20_consts: +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +.Lrol8: +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +.Lrol16: +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 +.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 +.Lavx2_init: +.long 0,0,0,0 +.Lsse_inc: +.long 1,0,0,0 +.Lavx2_inc: +.long 2,0,0,0,2,0,0,0 +.Lclamp: +.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC +.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF +.align 16 +.Land_masks: +.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.text + +.type poly_hash_ad_internal,@function +.align 64 +poly_hash_ad_internal: +.cfi_startproc +.cfi_def_cfa rsp, 8 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + cmpq $13,%r8 + jne .Lhash_ad_loop +.Lpoly_fast_tls_ad: + + movq (%rcx),%r10 + movq 5(%rcx),%r11 + shrq $24,%r11 + movq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + ret +.Lhash_ad_loop: + + cmpq $16,%r8 + jb .Lhash_ad_tail + addq 0+0(%rcx),%r10 + adcq 8+0(%rcx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rcx),%rcx + subq $16,%r8 + jmp .Lhash_ad_loop +.Lhash_ad_tail: + cmpq $0,%r8 + je .Lhash_ad_done + + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + addq %r8,%rcx +.Lhash_ad_tail_loop: + shldq $8,%r13,%r14 + shlq $8,%r13 + movzbq -1(%rcx),%r15 + xorq %r15,%r13 + decq %rcx + decq %r8 + jne .Lhash_ad_tail_loop + + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +.Lhash_ad_done: + ret +.cfi_endproc +.size poly_hash_ad_internal, .-poly_hash_ad_internal + +.globl chacha20_poly1305_open +.hidden chacha20_poly1305_open +.type chacha20_poly1305_open,@function +.align 64 +chacha20_poly1305_open: +.cfi_startproc +_CET_ENDBR + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + + pushq %r9 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r9,-64 + subq $288 + 0 + 32,%rsp +.cfi_adjust_cfa_offset 288 + 32 + + leaq 32(%rsp),%rbp + andq $-32,%rbp + + movq %rdx,%rbx + movq %r8,0+0+32(%rbp) + movq %rbx,8+0+32(%rbp) + + movl OPENSSL_ia32cap_P+8(%rip),%eax + andl $288,%eax + xorl $288,%eax + jz chacha20_poly1305_open_avx2 + + cmpq $128,%rbx + jbe .Lopen_sse_128 + + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqu 0(%r9),%xmm4 + movdqu 16(%r9),%xmm8 + movdqu 32(%r9),%xmm12 + + movdqa %xmm12,%xmm7 + + movdqa %xmm4,0+48(%rbp) + movdqa %xmm8,0+64(%rbp) + movdqa %xmm12,0+96(%rbp) + movq $10,%r10 +.Lopen_sse_init_rounds: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + decq %r10 + jne .Lopen_sse_init_rounds + + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + + pand .Lclamp(%rip),%xmm0 + movdqa %xmm0,0+0(%rbp) + movdqa %xmm4,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal +.Lopen_sse_main_loop: + cmpq $256,%rbx + jb .Lopen_sse_tail + + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa 0+96(%rbp),%xmm15 + paddd .Lsse_inc(%rip),%xmm15 + movdqa %xmm15,%xmm14 + paddd .Lsse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + + + + movq $4,%rcx + movq %rsi,%r8 +.Lopen_sse_main_loop_rounds: + movdqa %xmm8,0+80(%rbp) + movdqa .Lrol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + + leaq 16(%r8),%r8 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movdqa .Lrol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + movdqa %xmm8,0+80(%rbp) + movdqa .Lrol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa .Lrol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + decq %rcx + jge .Lopen_sse_main_loop_rounds + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + cmpq $-6,%rcx + jg .Lopen_sse_main_loop_rounds + paddd .Lchacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd .Lchacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqa %xmm12,0+80(%rbp) + movdqu 0 + 0(%rsi),%xmm12 + pxor %xmm3,%xmm12 + movdqu %xmm12,0 + 0(%rdi) + movdqu 16 + 0(%rsi),%xmm12 + pxor %xmm7,%xmm12 + movdqu %xmm12,16 + 0(%rdi) + movdqu 32 + 0(%rsi),%xmm12 + pxor %xmm11,%xmm12 + movdqu %xmm12,32 + 0(%rdi) + movdqu 48 + 0(%rsi),%xmm12 + pxor %xmm15,%xmm12 + movdqu %xmm12,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 64(%rdi) + movdqu %xmm6,16 + 64(%rdi) + movdqu %xmm10,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 128(%rdi) + movdqu %xmm5,16 + 128(%rdi) + movdqu %xmm9,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + movdqu 0 + 192(%rsi),%xmm3 + movdqu 16 + 192(%rsi),%xmm7 + movdqu 32 + 192(%rsi),%xmm11 + movdqu 48 + 192(%rsi),%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm7,%xmm4 + pxor %xmm11,%xmm8 + pxor 0+80(%rbp),%xmm15 + movdqu %xmm0,0 + 192(%rdi) + movdqu %xmm4,16 + 192(%rdi) + movdqu %xmm8,32 + 192(%rdi) + movdqu %xmm15,48 + 192(%rdi) + + leaq 256(%rsi),%rsi + leaq 256(%rdi),%rdi + subq $256,%rbx + jmp .Lopen_sse_main_loop +.Lopen_sse_tail: + + testq %rbx,%rbx + jz .Lopen_sse_finalize + cmpq $192,%rbx + ja .Lopen_sse_tail_256 + cmpq $128,%rbx + ja .Lopen_sse_tail_192 + cmpq $64,%rbx + ja .Lopen_sse_tail_128 + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa 0+96(%rbp),%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + + xorq %r8,%r8 + movq %rbx,%rcx + cmpq $16,%rcx + jb .Lopen_sse_tail_64_rounds +.Lopen_sse_tail_64_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + subq $16,%rcx +.Lopen_sse_tail_64_rounds: + addq $16,%r8 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + cmpq $16,%rcx + jae .Lopen_sse_tail_64_rounds_and_x1hash + cmpq $160,%r8 + jne .Lopen_sse_tail_64_rounds + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + jmp .Lopen_sse_tail_64_dec_loop + +.Lopen_sse_tail_128: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa 0+96(%rbp),%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + + movq %rbx,%rcx + andq $-16,%rcx + xorq %r8,%r8 +.Lopen_sse_tail_128_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +.Lopen_sse_tail_128_rounds: + addq $16,%r8 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + + cmpq %rcx,%r8 + jb .Lopen_sse_tail_128_rounds_and_x1hash + cmpq $160,%r8 + jne .Lopen_sse_tail_128_rounds + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 0(%rdi) + movdqu %xmm5,16 + 0(%rdi) + movdqu %xmm9,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + + subq $64,%rbx + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + jmp .Lopen_sse_tail_64_dec_loop + +.Lopen_sse_tail_192: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa 0+96(%rbp),%xmm14 + paddd .Lsse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + + movq %rbx,%rcx + movq $160,%r8 + cmpq $160,%rcx + cmovgq %r8,%rcx + andq $-16,%rcx + xorq %r8,%r8 +.Lopen_sse_tail_192_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +.Lopen_sse_tail_192_rounds: + addq $16,%r8 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + cmpq %rcx,%r8 + jb .Lopen_sse_tail_192_rounds_and_x1hash + cmpq $160,%r8 + jne .Lopen_sse_tail_192_rounds + cmpq $176,%rbx + jb .Lopen_sse_tail_192_finish + addq 0+160(%rsi),%r10 + adcq 8+160(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + cmpq $192,%rbx + jb .Lopen_sse_tail_192_finish + addq 0+176(%rsi),%r10 + adcq 8+176(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +.Lopen_sse_tail_192_finish: + paddd .Lchacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 0(%rdi) + movdqu %xmm6,16 + 0(%rdi) + movdqu %xmm10,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 64(%rdi) + movdqu %xmm5,16 + 64(%rdi) + movdqu %xmm9,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + + subq $128,%rbx + leaq 128(%rsi),%rsi + leaq 128(%rdi),%rdi + jmp .Lopen_sse_tail_64_dec_loop + +.Lopen_sse_tail_256: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa 0+96(%rbp),%xmm15 + paddd .Lsse_inc(%rip),%xmm15 + movdqa %xmm15,%xmm14 + paddd .Lsse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + + xorq %r8,%r8 +.Lopen_sse_tail_256_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movdqa %xmm11,0+80(%rbp) + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm4 + pxor %xmm11,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm4 + pxor %xmm11,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm5 + pxor %xmm11,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm5 + pxor %xmm11,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm6 + pxor %xmm11,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm6 + pxor %xmm11,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + movdqa 0+80(%rbp),%xmm11 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movdqa %xmm9,0+80(%rbp) + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb .Lrol16(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $12,%xmm9 + psrld $20,%xmm7 + pxor %xmm9,%xmm7 + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb .Lrol8(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $7,%xmm9 + psrld $25,%xmm7 + pxor %xmm9,%xmm7 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 + movdqa 0+80(%rbp),%xmm9 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + movdqa %xmm11,0+80(%rbp) + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm4 + pxor %xmm11,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm4 + pxor %xmm11,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm5 + pxor %xmm11,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm5 + pxor %xmm11,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $12,%xmm11 + psrld $20,%xmm6 + pxor %xmm11,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm11 + pslld $7,%xmm11 + psrld $25,%xmm6 + pxor %xmm11,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + movdqa 0+80(%rbp),%xmm11 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + movdqa %xmm9,0+80(%rbp) + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb .Lrol16(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $12,%xmm9 + psrld $20,%xmm7 + pxor %xmm9,%xmm7 + paddd %xmm7,%xmm3 + pxor %xmm3,%xmm15 + pshufb .Lrol8(%rip),%xmm15 + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm9 + pslld $7,%xmm9 + psrld $25,%xmm7 + pxor %xmm9,%xmm7 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 + movdqa 0+80(%rbp),%xmm9 + + addq $16,%r8 + cmpq $160,%r8 + jb .Lopen_sse_tail_256_rounds_and_x1hash + + movq %rbx,%rcx + andq $-16,%rcx +.Lopen_sse_tail_256_hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + addq $16,%r8 + cmpq %rcx,%r8 + jb .Lopen_sse_tail_256_hash + paddd .Lchacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd .Lchacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqa %xmm12,0+80(%rbp) + movdqu 0 + 0(%rsi),%xmm12 + pxor %xmm3,%xmm12 + movdqu %xmm12,0 + 0(%rdi) + movdqu 16 + 0(%rsi),%xmm12 + pxor %xmm7,%xmm12 + movdqu %xmm12,16 + 0(%rdi) + movdqu 32 + 0(%rsi),%xmm12 + pxor %xmm11,%xmm12 + movdqu %xmm12,32 + 0(%rdi) + movdqu 48 + 0(%rsi),%xmm12 + pxor %xmm15,%xmm12 + movdqu %xmm12,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 64(%rdi) + movdqu %xmm6,16 + 64(%rdi) + movdqu %xmm10,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 128(%rdi) + movdqu %xmm5,16 + 128(%rdi) + movdqu %xmm9,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + + movdqa 0+80(%rbp),%xmm12 + subq $192,%rbx + leaq 192(%rsi),%rsi + leaq 192(%rdi),%rdi + + +.Lopen_sse_tail_64_dec_loop: + cmpq $16,%rbx + jb .Lopen_sse_tail_16_init + subq $16,%rbx + movdqu (%rsi),%xmm3 + pxor %xmm3,%xmm0 + movdqu %xmm0,(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + movdqa %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + jmp .Lopen_sse_tail_64_dec_loop +.Lopen_sse_tail_16_init: + movdqa %xmm0,%xmm1 + + +.Lopen_sse_tail_16: + testq %rbx,%rbx + jz .Lopen_sse_finalize + + + + pxor %xmm3,%xmm3 + leaq -1(%rsi,%rbx,1),%rsi + movq %rbx,%r8 +.Lopen_sse_tail_16_compose: + pslldq $1,%xmm3 + pinsrb $0,(%rsi),%xmm3 + subq $1,%rsi + subq $1,%r8 + jnz .Lopen_sse_tail_16_compose + +.byte 102,73,15,126,221 + pextrq $1,%xmm3,%r14 + + pxor %xmm1,%xmm3 + + +.Lopen_sse_tail_16_extract: + pextrb $0,%xmm3,(%rdi) + psrldq $1,%xmm3 + addq $1,%rdi + subq $1,%rbx + jne .Lopen_sse_tail_16_extract + + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +.Lopen_sse_finalize: + addq 0+0+32(%rbp),%r10 + adcq 8+0+32(%rbp),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movq %r10,%r13 + movq %r11,%r14 + movq %r12,%r15 + subq $-5,%r10 + sbbq $-1,%r11 + sbbq $3,%r12 + cmovcq %r13,%r10 + cmovcq %r14,%r11 + cmovcq %r15,%r12 + + addq 0+0+16(%rbp),%r10 + adcq 8+0+16(%rbp),%r11 + +.cfi_remember_state + addq $288 + 0 + 32,%rsp +.cfi_adjust_cfa_offset -(288 + 32) + + popq %r9 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r9 + movq %r10,(%r9) + movq %r11,8(%r9) + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + ret + +.Lopen_sse_128: +.cfi_restore_state + movdqu .Lchacha20_consts(%rip),%xmm0 + movdqa %xmm0,%xmm1 + movdqa %xmm0,%xmm2 + movdqu 0(%r9),%xmm4 + movdqa %xmm4,%xmm5 + movdqa %xmm4,%xmm6 + movdqu 16(%r9),%xmm8 + movdqa %xmm8,%xmm9 + movdqa %xmm8,%xmm10 + movdqu 32(%r9),%xmm12 + movdqa %xmm12,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm14 + paddd .Lsse_inc(%rip),%xmm14 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa %xmm13,%xmm15 + movq $10,%r10 + +.Lopen_sse_128_rounds: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + decq %r10 + jnz .Lopen_sse_128_rounds + paddd .Lchacha20_consts(%rip),%xmm0 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd .Lchacha20_consts(%rip),%xmm2 + paddd %xmm7,%xmm4 + paddd %xmm7,%xmm5 + paddd %xmm7,%xmm6 + paddd %xmm11,%xmm9 + paddd %xmm11,%xmm10 + paddd %xmm15,%xmm13 + paddd .Lsse_inc(%rip),%xmm15 + paddd %xmm15,%xmm14 + + pand .Lclamp(%rip),%xmm0 + movdqa %xmm0,0+0(%rbp) + movdqa %xmm4,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal +.Lopen_sse_128_xor_hash: + cmpq $16,%rbx + jb .Lopen_sse_tail_16 + subq $16,%rbx + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + + + movdqu 0(%rsi),%xmm3 + pxor %xmm3,%xmm1 + movdqu %xmm1,0(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movdqa %xmm5,%xmm1 + movdqa %xmm9,%xmm5 + movdqa %xmm13,%xmm9 + movdqa %xmm2,%xmm13 + movdqa %xmm6,%xmm2 + movdqa %xmm10,%xmm6 + movdqa %xmm14,%xmm10 + jmp .Lopen_sse_128_xor_hash +.size chacha20_poly1305_open, .-chacha20_poly1305_open +.cfi_endproc + + + + + + + +.globl chacha20_poly1305_seal +.hidden chacha20_poly1305_seal +.type chacha20_poly1305_seal,@function +.align 64 +chacha20_poly1305_seal: +.cfi_startproc +_CET_ENDBR + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + + pushq %r9 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r9,-64 + subq $288 + 0 + 32,%rsp +.cfi_adjust_cfa_offset 288 + 32 + leaq 32(%rsp),%rbp + andq $-32,%rbp + + movq 56(%r9),%rbx + addq %rdx,%rbx + movq %r8,0+0+32(%rbp) + movq %rbx,8+0+32(%rbp) + movq %rdx,%rbx + + movl OPENSSL_ia32cap_P+8(%rip),%eax + andl $288,%eax + xorl $288,%eax + jz chacha20_poly1305_seal_avx2 + + cmpq $128,%rbx + jbe .Lseal_sse_128 + + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqu 0(%r9),%xmm4 + movdqu 16(%r9),%xmm8 + movdqu 32(%r9),%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm0,%xmm2 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm5 + movdqa %xmm4,%xmm6 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm9 + movdqa %xmm8,%xmm10 + movdqa %xmm8,%xmm11 + movdqa %xmm12,%xmm15 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,%xmm14 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,%xmm13 + paddd .Lsse_inc(%rip),%xmm12 + + movdqa %xmm4,0+48(%rbp) + movdqa %xmm8,0+64(%rbp) + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + movq $10,%r10 +.Lseal_sse_init_rounds: + movdqa %xmm8,0+80(%rbp) + movdqa .Lrol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa .Lrol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + movdqa %xmm8,0+80(%rbp) + movdqa .Lrol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa .Lrol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + decq %r10 + jnz .Lseal_sse_init_rounds + paddd .Lchacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd .Lchacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + + pand .Lclamp(%rip),%xmm3 + movdqa %xmm3,0+0(%rbp) + movdqa %xmm7,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 0(%rdi) + movdqu %xmm6,16 + 0(%rdi) + movdqu %xmm10,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 64(%rdi) + movdqu %xmm5,16 + 64(%rdi) + movdqu %xmm9,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + + cmpq $192,%rbx + ja .Lseal_sse_main_init + movq $128,%rcx + subq $128,%rbx + leaq 128(%rsi),%rsi + jmp .Lseal_sse_128_tail_hash +.Lseal_sse_main_init: + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm7,%xmm4 + pxor %xmm11,%xmm8 + pxor %xmm12,%xmm15 + movdqu %xmm0,0 + 128(%rdi) + movdqu %xmm4,16 + 128(%rdi) + movdqu %xmm8,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + + movq $192,%rcx + subq $192,%rbx + leaq 192(%rsi),%rsi + movq $2,%rcx + movq $8,%r8 + cmpq $64,%rbx + jbe .Lseal_sse_tail_64 + cmpq $128,%rbx + jbe .Lseal_sse_tail_128 + cmpq $192,%rbx + jbe .Lseal_sse_tail_192 + +.Lseal_sse_main_loop: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa %xmm0,%xmm3 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa 0+96(%rbp),%xmm15 + paddd .Lsse_inc(%rip),%xmm15 + movdqa %xmm15,%xmm14 + paddd .Lsse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + movdqa %xmm15,0+144(%rbp) + +.align 32 +.Lseal_sse_main_rounds: + movdqa %xmm8,0+80(%rbp) + movdqa .Lrol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movdqa .Lrol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 +.byte 102,15,58,15,255,4 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,12 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + movdqa %xmm8,0+80(%rbp) + movdqa .Lrol16(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $20,%xmm8 + pslld $32-20,%xmm4 + pxor %xmm8,%xmm4 + movdqa .Lrol8(%rip),%xmm8 + paddd %xmm7,%xmm3 + paddd %xmm6,%xmm2 + paddd %xmm5,%xmm1 + paddd %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pxor %xmm2,%xmm14 + pxor %xmm1,%xmm13 + pxor %xmm0,%xmm12 +.byte 102,69,15,56,0,248 +.byte 102,69,15,56,0,240 +.byte 102,69,15,56,0,232 +.byte 102,69,15,56,0,224 + movdqa 0+80(%rbp),%xmm8 + paddd %xmm15,%xmm11 + paddd %xmm14,%xmm10 + paddd %xmm13,%xmm9 + paddd %xmm12,%xmm8 + pxor %xmm11,%xmm7 + pxor %xmm10,%xmm6 + pxor %xmm9,%xmm5 + pxor %xmm8,%xmm4 + movdqa %xmm8,0+80(%rbp) + movdqa %xmm7,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm7 + pxor %xmm8,%xmm7 + movdqa %xmm6,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm6 + pxor %xmm8,%xmm6 + movdqa %xmm5,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm5 + pxor %xmm8,%xmm5 + movdqa %xmm4,%xmm8 + psrld $25,%xmm8 + pslld $32-25,%xmm4 + pxor %xmm8,%xmm4 + movdqa 0+80(%rbp),%xmm8 +.byte 102,15,58,15,255,12 +.byte 102,69,15,58,15,219,8 +.byte 102,69,15,58,15,255,4 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + + leaq 16(%rdi),%rdi + decq %r8 + jge .Lseal_sse_main_rounds + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi + decq %rcx + jg .Lseal_sse_main_rounds + paddd .Lchacha20_consts(%rip),%xmm3 + paddd 0+48(%rbp),%xmm7 + paddd 0+64(%rbp),%xmm11 + paddd 0+144(%rbp),%xmm15 + paddd .Lchacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + movdqa %xmm14,0+80(%rbp) + movdqa %xmm14,0+80(%rbp) + movdqu 0 + 0(%rsi),%xmm14 + pxor %xmm3,%xmm14 + movdqu %xmm14,0 + 0(%rdi) + movdqu 16 + 0(%rsi),%xmm14 + pxor %xmm7,%xmm14 + movdqu %xmm14,16 + 0(%rdi) + movdqu 32 + 0(%rsi),%xmm14 + pxor %xmm11,%xmm14 + movdqu %xmm14,32 + 0(%rdi) + movdqu 48 + 0(%rsi),%xmm14 + pxor %xmm15,%xmm14 + movdqu %xmm14,48 + 0(%rdi) + + movdqa 0+80(%rbp),%xmm14 + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 64(%rdi) + movdqu %xmm6,16 + 64(%rdi) + movdqu %xmm10,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + movdqu 0 + 128(%rsi),%xmm3 + movdqu 16 + 128(%rsi),%xmm7 + movdqu 32 + 128(%rsi),%xmm11 + movdqu 48 + 128(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 128(%rdi) + movdqu %xmm5,16 + 128(%rdi) + movdqu %xmm9,32 + 128(%rdi) + movdqu %xmm15,48 + 128(%rdi) + + cmpq $256,%rbx + ja .Lseal_sse_main_loop_xor + + movq $192,%rcx + subq $192,%rbx + leaq 192(%rsi),%rsi + jmp .Lseal_sse_128_tail_hash +.Lseal_sse_main_loop_xor: + movdqu 0 + 192(%rsi),%xmm3 + movdqu 16 + 192(%rsi),%xmm7 + movdqu 32 + 192(%rsi),%xmm11 + movdqu 48 + 192(%rsi),%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm7,%xmm4 + pxor %xmm11,%xmm8 + pxor %xmm12,%xmm15 + movdqu %xmm0,0 + 192(%rdi) + movdqu %xmm4,16 + 192(%rdi) + movdqu %xmm8,32 + 192(%rdi) + movdqu %xmm15,48 + 192(%rdi) + + leaq 256(%rsi),%rsi + subq $256,%rbx + movq $6,%rcx + movq $4,%r8 + cmpq $192,%rbx + jg .Lseal_sse_main_loop + movq %rbx,%rcx + testq %rbx,%rbx + je .Lseal_sse_128_tail_hash + movq $6,%rcx + cmpq $128,%rbx + ja .Lseal_sse_tail_192 + cmpq $64,%rbx + ja .Lseal_sse_tail_128 + +.Lseal_sse_tail_64: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa 0+96(%rbp),%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + +.Lseal_sse_tail_64_rounds_and_x2hash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_sse_tail_64_rounds_and_x1hash: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi + decq %rcx + jg .Lseal_sse_tail_64_rounds_and_x2hash + decq %r8 + jge .Lseal_sse_tail_64_rounds_and_x1hash + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + + jmp .Lseal_sse_128_tail_xor + +.Lseal_sse_tail_128: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa 0+96(%rbp),%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + +.Lseal_sse_tail_128_rounds_and_x2hash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_sse_tail_128_rounds_and_x1hash: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + + leaq 16(%rdi),%rdi + decq %rcx + jg .Lseal_sse_tail_128_rounds_and_x2hash + decq %r8 + jge .Lseal_sse_tail_128_rounds_and_x1hash + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 0(%rdi) + movdqu %xmm5,16 + 0(%rdi) + movdqu %xmm9,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + + movq $64,%rcx + subq $64,%rbx + leaq 64(%rsi),%rsi + jmp .Lseal_sse_128_tail_hash + +.Lseal_sse_tail_192: + movdqa .Lchacha20_consts(%rip),%xmm0 + movdqa 0+48(%rbp),%xmm4 + movdqa 0+64(%rbp),%xmm8 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm5 + movdqa %xmm8,%xmm9 + movdqa %xmm0,%xmm2 + movdqa %xmm4,%xmm6 + movdqa %xmm8,%xmm10 + movdqa 0+96(%rbp),%xmm14 + paddd .Lsse_inc(%rip),%xmm14 + movdqa %xmm14,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm13,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,0+96(%rbp) + movdqa %xmm13,0+112(%rbp) + movdqa %xmm14,0+128(%rbp) + +.Lseal_sse_tail_192_rounds_and_x2hash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_sse_tail_192_rounds_and_x1hash: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + leaq 16(%rdi),%rdi + decq %rcx + jg .Lseal_sse_tail_192_rounds_and_x2hash + decq %r8 + jge .Lseal_sse_tail_192_rounds_and_x1hash + paddd .Lchacha20_consts(%rip),%xmm2 + paddd 0+48(%rbp),%xmm6 + paddd 0+64(%rbp),%xmm10 + paddd 0+128(%rbp),%xmm14 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd 0+48(%rbp),%xmm5 + paddd 0+64(%rbp),%xmm9 + paddd 0+112(%rbp),%xmm13 + paddd .Lchacha20_consts(%rip),%xmm0 + paddd 0+48(%rbp),%xmm4 + paddd 0+64(%rbp),%xmm8 + paddd 0+96(%rbp),%xmm12 + movdqu 0 + 0(%rsi),%xmm3 + movdqu 16 + 0(%rsi),%xmm7 + movdqu 32 + 0(%rsi),%xmm11 + movdqu 48 + 0(%rsi),%xmm15 + pxor %xmm3,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm11,%xmm10 + pxor %xmm14,%xmm15 + movdqu %xmm2,0 + 0(%rdi) + movdqu %xmm6,16 + 0(%rdi) + movdqu %xmm10,32 + 0(%rdi) + movdqu %xmm15,48 + 0(%rdi) + movdqu 0 + 64(%rsi),%xmm3 + movdqu 16 + 64(%rsi),%xmm7 + movdqu 32 + 64(%rsi),%xmm11 + movdqu 48 + 64(%rsi),%xmm15 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm5 + pxor %xmm11,%xmm9 + pxor %xmm13,%xmm15 + movdqu %xmm1,0 + 64(%rdi) + movdqu %xmm5,16 + 64(%rdi) + movdqu %xmm9,32 + 64(%rdi) + movdqu %xmm15,48 + 64(%rdi) + + movq $128,%rcx + subq $128,%rbx + leaq 128(%rsi),%rsi + +.Lseal_sse_128_tail_hash: + cmpq $16,%rcx + jb .Lseal_sse_128_tail_xor + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + subq $16,%rcx + leaq 16(%rdi),%rdi + jmp .Lseal_sse_128_tail_hash + +.Lseal_sse_128_tail_xor: + cmpq $16,%rbx + jb .Lseal_sse_tail_16 + subq $16,%rbx + + movdqu 0(%rsi),%xmm3 + pxor %xmm3,%xmm0 + movdqu %xmm0,0(%rdi) + + addq 0(%rdi),%r10 + adcq 8(%rdi),%r11 + adcq $1,%r12 + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movdqa %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm1 + movdqa %xmm9,%xmm5 + movdqa %xmm13,%xmm9 + jmp .Lseal_sse_128_tail_xor + +.Lseal_sse_tail_16: + testq %rbx,%rbx + jz .Lprocess_blocks_of_extra_in + + movq %rbx,%r8 + movq %rbx,%rcx + leaq -1(%rsi,%rbx,1),%rsi + pxor %xmm15,%xmm15 +.Lseal_sse_tail_16_compose: + pslldq $1,%xmm15 + pinsrb $0,(%rsi),%xmm15 + leaq -1(%rsi),%rsi + decq %rcx + jne .Lseal_sse_tail_16_compose + + + pxor %xmm0,%xmm15 + + + movq %rbx,%rcx + movdqu %xmm15,%xmm0 +.Lseal_sse_tail_16_extract: + pextrb $0,%xmm0,(%rdi) + psrldq $1,%xmm0 + addq $1,%rdi + subq $1,%rcx + jnz .Lseal_sse_tail_16_extract + + + + + + + + + movq 288 + 0 + 32(%rsp),%r9 + movq 56(%r9),%r14 + movq 48(%r9),%r13 + testq %r14,%r14 + jz .Lprocess_partial_block + + movq $16,%r15 + subq %rbx,%r15 + cmpq %r15,%r14 + + jge .Lload_extra_in + movq %r14,%r15 + +.Lload_extra_in: + + + leaq -1(%r13,%r15,1),%rsi + + + addq %r15,%r13 + subq %r15,%r14 + movq %r13,48(%r9) + movq %r14,56(%r9) + + + + addq %r15,%r8 + + + pxor %xmm11,%xmm11 +.Lload_extra_load_loop: + pslldq $1,%xmm11 + pinsrb $0,(%rsi),%xmm11 + leaq -1(%rsi),%rsi + subq $1,%r15 + jnz .Lload_extra_load_loop + + + + + movq %rbx,%r15 + +.Lload_extra_shift_loop: + pslldq $1,%xmm11 + subq $1,%r15 + jnz .Lload_extra_shift_loop + + + + + leaq .Land_masks(%rip),%r15 + shlq $4,%rbx + pand -16(%r15,%rbx,1),%xmm15 + + + por %xmm11,%xmm15 + + + +.byte 102,77,15,126,253 + pextrq $1,%xmm15,%r14 + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +.Lprocess_blocks_of_extra_in: + + movq 288+32+0 (%rsp),%r9 + movq 48(%r9),%rsi + movq 56(%r9),%r8 + movq %r8,%rcx + shrq $4,%r8 + +.Lprocess_extra_hash_loop: + jz process_extra_in_trailer + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rsi),%rsi + subq $1,%r8 + jmp .Lprocess_extra_hash_loop +process_extra_in_trailer: + andq $15,%rcx + movq %rcx,%rbx + jz .Ldo_length_block + leaq -1(%rsi,%rcx,1),%rsi + +.Lprocess_extra_in_trailer_load: + pslldq $1,%xmm15 + pinsrb $0,(%rsi),%xmm15 + leaq -1(%rsi),%rsi + subq $1,%rcx + jnz .Lprocess_extra_in_trailer_load + +.Lprocess_partial_block: + + leaq .Land_masks(%rip),%r15 + shlq $4,%rbx + pand -16(%r15,%rbx,1),%xmm15 +.byte 102,77,15,126,253 + pextrq $1,%xmm15,%r14 + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +.Ldo_length_block: + addq 0+0+32(%rbp),%r10 + adcq 8+0+32(%rbp),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + movq %r10,%r13 + movq %r11,%r14 + movq %r12,%r15 + subq $-5,%r10 + sbbq $-1,%r11 + sbbq $3,%r12 + cmovcq %r13,%r10 + cmovcq %r14,%r11 + cmovcq %r15,%r12 + + addq 0+0+16(%rbp),%r10 + adcq 8+0+16(%rbp),%r11 + +.cfi_remember_state + addq $288 + 0 + 32,%rsp +.cfi_adjust_cfa_offset -(288 + 32) + + popq %r9 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r9 + movq %r10,(%r9) + movq %r11,8(%r9) + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + ret + +.Lseal_sse_128: +.cfi_restore_state + movdqu .Lchacha20_consts(%rip),%xmm0 + movdqa %xmm0,%xmm1 + movdqa %xmm0,%xmm2 + movdqu 0(%r9),%xmm4 + movdqa %xmm4,%xmm5 + movdqa %xmm4,%xmm6 + movdqu 16(%r9),%xmm8 + movdqa %xmm8,%xmm9 + movdqa %xmm8,%xmm10 + movdqu 32(%r9),%xmm14 + movdqa %xmm14,%xmm12 + paddd .Lsse_inc(%rip),%xmm12 + movdqa %xmm12,%xmm13 + paddd .Lsse_inc(%rip),%xmm13 + movdqa %xmm4,%xmm7 + movdqa %xmm8,%xmm11 + movdqa %xmm12,%xmm15 + movq $10,%r10 + +.Lseal_sse_128_rounds: + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,4 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,12 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,4 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,12 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,4 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,12 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol16(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm4 + pxor %xmm3,%xmm4 + paddd %xmm4,%xmm0 + pxor %xmm0,%xmm12 + pshufb .Lrol8(%rip),%xmm12 + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,15,228,12 +.byte 102,69,15,58,15,192,8 +.byte 102,69,15,58,15,228,4 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol16(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm5 + pxor %xmm3,%xmm5 + paddd %xmm5,%xmm1 + pxor %xmm1,%xmm13 + pshufb .Lrol8(%rip),%xmm13 + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm5 + pxor %xmm3,%xmm5 +.byte 102,15,58,15,237,12 +.byte 102,69,15,58,15,201,8 +.byte 102,69,15,58,15,237,4 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol16(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $12,%xmm3 + psrld $20,%xmm6 + pxor %xmm3,%xmm6 + paddd %xmm6,%xmm2 + pxor %xmm2,%xmm14 + pshufb .Lrol8(%rip),%xmm14 + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm3 + pslld $7,%xmm3 + psrld $25,%xmm6 + pxor %xmm3,%xmm6 +.byte 102,15,58,15,246,12 +.byte 102,69,15,58,15,210,8 +.byte 102,69,15,58,15,246,4 + + decq %r10 + jnz .Lseal_sse_128_rounds + paddd .Lchacha20_consts(%rip),%xmm0 + paddd .Lchacha20_consts(%rip),%xmm1 + paddd .Lchacha20_consts(%rip),%xmm2 + paddd %xmm7,%xmm4 + paddd %xmm7,%xmm5 + paddd %xmm7,%xmm6 + paddd %xmm11,%xmm8 + paddd %xmm11,%xmm9 + paddd %xmm15,%xmm12 + paddd .Lsse_inc(%rip),%xmm15 + paddd %xmm15,%xmm13 + + pand .Lclamp(%rip),%xmm2 + movdqa %xmm2,0+0(%rbp) + movdqa %xmm6,0+16(%rbp) + + movq %r8,%r8 + call poly_hash_ad_internal + jmp .Lseal_sse_128_tail_xor +.size chacha20_poly1305_seal, .-chacha20_poly1305_seal +.cfi_endproc + + +.type chacha20_poly1305_open_avx2,@function +.align 64 +chacha20_poly1305_open_avx2: +.cfi_startproc + + +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r9,-64 +.cfi_adjust_cfa_offset 288 + 32 + + vzeroupper + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vbroadcasti128 0(%r9),%ymm4 + vbroadcasti128 16(%r9),%ymm8 + vbroadcasti128 32(%r9),%ymm12 + vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 + cmpq $192,%rbx + jbe .Lopen_avx2_192 + cmpq $320,%rbx + jbe .Lopen_avx2_320 + + vmovdqa %ymm4,0+64(%rbp) + vmovdqa %ymm8,0+96(%rbp) + vmovdqa %ymm12,0+160(%rbp) + movq $10,%r10 +.Lopen_avx2_init_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + + decq %r10 + jne .Lopen_avx2_init_rounds + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand .Lclamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + + movq %r8,%r8 + call poly_hash_ad_internal + + xorq %rcx,%rcx +.Lopen_avx2_init_hash: + addq 0+0(%rsi,%rcx,1),%r10 + adcq 8+0(%rsi,%rcx,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + addq $16,%rcx + cmpq $64,%rcx + jne .Lopen_avx2_init_hash + + vpxor 0(%rsi),%ymm0,%ymm0 + vpxor 32(%rsi),%ymm4,%ymm4 + + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm4,32(%rdi) + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + subq $64,%rbx +.Lopen_avx2_main_loop: + + cmpq $512,%rbx + jb .Lopen_avx2_main_loop_done + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + + xorq %rcx,%rcx +.Lopen_avx2_main_loop_rounds: + addq 0+0(%rsi,%rcx,1),%r10 + adcq 8+0(%rsi,%rcx,1),%r11 + adcq $1,%r12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + addq %rax,%r15 + adcq %rdx,%r9 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + addq 0+16(%rsi,%rcx,1),%r10 + adcq 8+16(%rsi,%rcx,1),%r11 + adcq $1,%r12 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + addq %rax,%r15 + adcq %rdx,%r9 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq 0+32(%rsi,%rcx,1),%r10 + adcq 8+32(%rsi,%rcx,1),%r11 + adcq $1,%r12 + + leaq 48(%rcx),%rcx + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq %rax,%r15 + adcq %rdx,%r9 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + cmpq $60*8,%rcx + jne .Lopen_avx2_main_loop_rounds + vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + addq 0+60*8(%rsi),%r10 + adcq 8+60*8(%rsi),%r11 + adcq $1,%r12 + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + addq 0+60*8+16(%rsi),%r10 + adcq 8+60*8+16(%rsi),%r11 + adcq $1,%r12 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 + vpxor 0+384(%rsi),%ymm3,%ymm3 + vpxor 32+384(%rsi),%ymm0,%ymm0 + vpxor 64+384(%rsi),%ymm4,%ymm4 + vpxor 96+384(%rsi),%ymm8,%ymm8 + vmovdqu %ymm3,0+384(%rdi) + vmovdqu %ymm0,32+384(%rdi) + vmovdqu %ymm4,64+384(%rdi) + vmovdqu %ymm8,96+384(%rdi) + + leaq 512(%rsi),%rsi + leaq 512(%rdi),%rdi + subq $512,%rbx + jmp .Lopen_avx2_main_loop +.Lopen_avx2_main_loop_done: + testq %rbx,%rbx + vzeroupper + je .Lopen_sse_finalize + + cmpq $384,%rbx + ja .Lopen_avx2_tail_512 + cmpq $256,%rbx + ja .Lopen_avx2_tail_384 + cmpq $128,%rbx + ja .Lopen_avx2_tail_256 + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + + xorq %r8,%r8 + movq %rbx,%rcx + andq $-16,%rcx + testq %rcx,%rcx + je .Lopen_avx2_tail_128_rounds +.Lopen_avx2_tail_128_rounds_and_x1hash: + addq 0+0(%rsi,%r8,1),%r10 + adcq 8+0(%rsi,%r8,1),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +.Lopen_avx2_tail_128_rounds: + addq $16,%r8 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + + cmpq %rcx,%r8 + jb .Lopen_avx2_tail_128_rounds_and_x1hash + cmpq $160,%r8 + jne .Lopen_avx2_tail_128_rounds + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + jmp .Lopen_avx2_tail_128_xor + +.Lopen_avx2_tail_256: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + + movq %rbx,0+128(%rbp) + movq %rbx,%rcx + subq $128,%rcx + shrq $4,%rcx + movq $10,%r8 + cmpq $10,%rcx + cmovgq %r8,%rcx + movq %rsi,%rbx + xorq %r8,%r8 +.Lopen_avx2_tail_256_rounds_and_x1hash: + addq 0+0(%rbx),%r10 + adcq 8+0(%rbx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rbx),%rbx +.Lopen_avx2_tail_256_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + + incq %r8 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + cmpq %rcx,%r8 + jb .Lopen_avx2_tail_256_rounds_and_x1hash + cmpq $10,%r8 + jne .Lopen_avx2_tail_256_rounds + movq %rbx,%r8 + subq %rsi,%rbx + movq %rbx,%rcx + movq 0+128(%rbp),%rbx +.Lopen_avx2_tail_256_hash: + addq $16,%rcx + cmpq %rbx,%rcx + jg .Lopen_avx2_tail_256_done + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + jmp .Lopen_avx2_tail_256_hash +.Lopen_avx2_tail_256_done: + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm1,%ymm1 + vpxor 64+0(%rsi),%ymm5,%ymm5 + vpxor 96+0(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm1,32+0(%rdi) + vmovdqu %ymm5,64+0(%rdi) + vmovdqu %ymm9,96+0(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + leaq 128(%rsi),%rsi + leaq 128(%rdi),%rdi + subq $128,%rbx + jmp .Lopen_avx2_tail_128_xor + +.Lopen_avx2_tail_384: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + + movq %rbx,0+128(%rbp) + movq %rbx,%rcx + subq $256,%rcx + shrq $4,%rcx + addq $6,%rcx + movq $10,%r8 + cmpq $10,%rcx + cmovgq %r8,%rcx + movq %rsi,%rbx + xorq %r8,%r8 +.Lopen_avx2_tail_384_rounds_and_x2hash: + addq 0+0(%rbx),%r10 + adcq 8+0(%rbx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rbx),%rbx +.Lopen_avx2_tail_384_rounds_and_x1hash: + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + addq 0+0(%rbx),%r10 + adcq 8+0(%rbx),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rbx),%rbx + incq %r8 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + + cmpq %rcx,%r8 + jb .Lopen_avx2_tail_384_rounds_and_x2hash + cmpq $10,%r8 + jne .Lopen_avx2_tail_384_rounds_and_x1hash + movq %rbx,%r8 + subq %rsi,%rbx + movq %rbx,%rcx + movq 0+128(%rbp),%rbx +.Lopen_avx2_384_tail_hash: + addq $16,%rcx + cmpq %rbx,%rcx + jg .Lopen_avx2_384_tail_done + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + jmp .Lopen_avx2_384_tail_hash +.Lopen_avx2_384_tail_done: + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm2,%ymm2 + vpxor 64+0(%rsi),%ymm6,%ymm6 + vpxor 96+0(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm2,32+0(%rdi) + vmovdqu %ymm6,64+0(%rdi) + vmovdqu %ymm10,96+0(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm1,%ymm1 + vpxor 64+128(%rsi),%ymm5,%ymm5 + vpxor 96+128(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm1,32+128(%rdi) + vmovdqu %ymm5,64+128(%rdi) + vmovdqu %ymm9,96+128(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + leaq 256(%rsi),%rsi + leaq 256(%rdi),%rdi + subq $256,%rbx + jmp .Lopen_avx2_tail_128_xor + +.Lopen_avx2_tail_512: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + + xorq %rcx,%rcx + movq %rsi,%r8 +.Lopen_avx2_tail_512_rounds_and_x2hash: + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 +.Lopen_avx2_tail_512_rounds_and_x1hash: + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + addq 0+16(%r8),%r10 + adcq 8+16(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%r8),%r8 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + incq %rcx + cmpq $4,%rcx + jl .Lopen_avx2_tail_512_rounds_and_x2hash + cmpq $10,%rcx + jne .Lopen_avx2_tail_512_rounds_and_x1hash + movq %rbx,%rcx + subq $384,%rcx + andq $-16,%rcx +.Lopen_avx2_tail_512_hash: + testq %rcx,%rcx + je .Lopen_avx2_tail_512_done + addq 0+0(%r8),%r10 + adcq 8+0(%r8),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%r8),%r8 + subq $16,%rcx + jmp .Lopen_avx2_tail_512_hash +.Lopen_avx2_tail_512_done: + vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + leaq 384(%rsi),%rsi + leaq 384(%rdi),%rdi + subq $384,%rbx +.Lopen_avx2_tail_128_xor: + cmpq $32,%rbx + jb .Lopen_avx2_tail_32_xor + subq $32,%rbx + vpxor (%rsi),%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + leaq 32(%rsi),%rsi + leaq 32(%rdi),%rdi + vmovdqa %ymm4,%ymm0 + vmovdqa %ymm8,%ymm4 + vmovdqa %ymm12,%ymm8 + jmp .Lopen_avx2_tail_128_xor +.Lopen_avx2_tail_32_xor: + cmpq $16,%rbx + vmovdqa %xmm0,%xmm1 + jb .Lopen_avx2_exit + subq $16,%rbx + + vpxor (%rsi),%xmm0,%xmm1 + vmovdqu %xmm1,(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 + vmovdqa %xmm0,%xmm1 +.Lopen_avx2_exit: + vzeroupper + jmp .Lopen_sse_tail_16 + +.Lopen_avx2_192: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 + vmovdqa %ymm12,%ymm11 + vmovdqa %ymm13,%ymm15 + movq $10,%r10 +.Lopen_avx2_192_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + + decq %r10 + jne .Lopen_avx2_192_rounds + vpaddd %ymm2,%ymm0,%ymm0 + vpaddd %ymm2,%ymm1,%ymm1 + vpaddd %ymm6,%ymm4,%ymm4 + vpaddd %ymm6,%ymm5,%ymm5 + vpaddd %ymm10,%ymm8,%ymm8 + vpaddd %ymm10,%ymm9,%ymm9 + vpaddd %ymm11,%ymm12,%ymm12 + vpaddd %ymm15,%ymm13,%ymm13 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand .Lclamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +.Lopen_avx2_short: + movq %r8,%r8 + call poly_hash_ad_internal +.Lopen_avx2_short_hash_and_xor_loop: + cmpq $32,%rbx + jb .Lopen_avx2_short_tail_32 + subq $32,%rbx + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rsi),%r10 + adcq 8+16(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + + vpxor (%rsi),%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + leaq 32(%rsi),%rsi + leaq 32(%rdi),%rdi + + vmovdqa %ymm4,%ymm0 + vmovdqa %ymm8,%ymm4 + vmovdqa %ymm12,%ymm8 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm5,%ymm1 + vmovdqa %ymm9,%ymm5 + vmovdqa %ymm13,%ymm9 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm6,%ymm2 + jmp .Lopen_avx2_short_hash_and_xor_loop +.Lopen_avx2_short_tail_32: + cmpq $16,%rbx + vmovdqa %xmm0,%xmm1 + jb .Lopen_avx2_short_tail_32_exit + subq $16,%rbx + addq 0+0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + vpxor (%rsi),%xmm0,%xmm3 + vmovdqu %xmm3,(%rdi) + leaq 16(%rsi),%rsi + leaq 16(%rdi),%rdi + vextracti128 $1,%ymm0,%xmm1 +.Lopen_avx2_short_tail_32_exit: + vzeroupper + jmp .Lopen_sse_tail_16 + +.Lopen_avx2_320: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 + vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + movq $10,%r10 +.Lopen_avx2_320_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + decq %r10 + jne .Lopen_avx2_320_rounds + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd %ymm7,%ymm4,%ymm4 + vpaddd %ymm7,%ymm5,%ymm5 + vpaddd %ymm7,%ymm6,%ymm6 + vpaddd %ymm11,%ymm8,%ymm8 + vpaddd %ymm11,%ymm9,%ymm9 + vpaddd %ymm11,%ymm10,%ymm10 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand .Lclamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 + jmp .Lopen_avx2_short +.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 +.cfi_endproc + + +.type chacha20_poly1305_seal_avx2,@function +.align 64 +chacha20_poly1305_seal_avx2: +.cfi_startproc + + +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r9,-64 +.cfi_adjust_cfa_offset 288 + 32 + + vzeroupper + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vbroadcasti128 0(%r9),%ymm4 + vbroadcasti128 16(%r9),%ymm8 + vbroadcasti128 32(%r9),%ymm12 + vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 + cmpq $192,%rbx + jbe .Lseal_avx2_192 + cmpq $320,%rbx + jbe .Lseal_avx2_320 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm4,0+64(%rbp) + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm8,%ymm11 + vmovdqa %ymm8,0+96(%rbp) + vmovdqa %ymm12,%ymm15 + vpaddd .Lavx2_inc(%rip),%ymm15,%ymm14 + vpaddd .Lavx2_inc(%rip),%ymm14,%ymm13 + vpaddd .Lavx2_inc(%rip),%ymm13,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm15,0+256(%rbp) + movq $10,%r10 +.Lseal_avx2_init_rounds: + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + decq %r10 + jnz .Lseal_avx2_init_rounds + vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 + vpand .Lclamp(%rip),%ymm15,%ymm15 + vmovdqa %ymm15,0+0(%rbp) + movq %r8,%r8 + call poly_hash_ad_internal + + vpxor 0(%rsi),%ymm3,%ymm3 + vpxor 32(%rsi),%ymm11,%ymm11 + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm11,32(%rdi) + vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+64(%rsi),%ymm15,%ymm15 + vpxor 32+64(%rsi),%ymm2,%ymm2 + vpxor 64+64(%rsi),%ymm6,%ymm6 + vpxor 96+64(%rsi),%ymm10,%ymm10 + vmovdqu %ymm15,0+64(%rdi) + vmovdqu %ymm2,32+64(%rdi) + vmovdqu %ymm6,64+64(%rdi) + vmovdqu %ymm10,96+64(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+192(%rsi),%ymm15,%ymm15 + vpxor 32+192(%rsi),%ymm1,%ymm1 + vpxor 64+192(%rsi),%ymm5,%ymm5 + vpxor 96+192(%rsi),%ymm9,%ymm9 + vmovdqu %ymm15,0+192(%rdi) + vmovdqu %ymm1,32+192(%rdi) + vmovdqu %ymm5,64+192(%rdi) + vmovdqu %ymm9,96+192(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm15,%ymm8 + + leaq 320(%rsi),%rsi + subq $320,%rbx + movq $320,%rcx + cmpq $128,%rbx + jbe .Lseal_avx2_short_hash_remainder + vpxor 0(%rsi),%ymm0,%ymm0 + vpxor 32(%rsi),%ymm4,%ymm4 + vpxor 64(%rsi),%ymm8,%ymm8 + vpxor 96(%rsi),%ymm12,%ymm12 + vmovdqu %ymm0,320(%rdi) + vmovdqu %ymm4,352(%rdi) + vmovdqu %ymm8,384(%rdi) + vmovdqu %ymm12,416(%rdi) + leaq 128(%rsi),%rsi + subq $128,%rbx + movq $8,%rcx + movq $2,%r8 + cmpq $128,%rbx + jbe .Lseal_avx2_tail_128 + cmpq $256,%rbx + jbe .Lseal_avx2_tail_256 + cmpq $384,%rbx + jbe .Lseal_avx2_tail_384 + cmpq $512,%rbx + jbe .Lseal_avx2_tail_512 + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + + subq $16,%rdi + movq $9,%rcx + jmp .Lseal_avx2_main_loop_rounds_entry +.align 32 +.Lseal_avx2_main_loop: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + + movq $10,%rcx +.align 32 +.Lseal_avx2_main_loop_rounds: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + addq %rax,%r15 + adcq %rdx,%r9 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + +.Lseal_avx2_main_loop_rounds_entry: + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + addq %rax,%r15 + adcq %rdx,%r9 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq 0+32(%rdi),%r10 + adcq 8+32(%rdi),%r11 + adcq $1,%r12 + + leaq 48(%rdi),%rdi + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + addq %rax,%r15 + adcq %rdx,%r9 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + decq %rcx + jne .Lseal_avx2_main_loop_rounds + vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 + vpxor 0+384(%rsi),%ymm3,%ymm3 + vpxor 32+384(%rsi),%ymm0,%ymm0 + vpxor 64+384(%rsi),%ymm4,%ymm4 + vpxor 96+384(%rsi),%ymm8,%ymm8 + vmovdqu %ymm3,0+384(%rdi) + vmovdqu %ymm0,32+384(%rdi) + vmovdqu %ymm4,64+384(%rdi) + vmovdqu %ymm8,96+384(%rdi) + + leaq 512(%rsi),%rsi + subq $512,%rbx + cmpq $512,%rbx + jg .Lseal_avx2_main_loop + + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + movq $10,%rcx + xorq %r8,%r8 + + cmpq $384,%rbx + ja .Lseal_avx2_tail_512 + cmpq $256,%rbx + ja .Lseal_avx2_tail_384 + cmpq $128,%rbx + ja .Lseal_avx2_tail_256 + +.Lseal_avx2_tail_128: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + +.Lseal_avx2_tail_128_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_avx2_tail_128_rounds_and_2xhash: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + decq %rcx + jg .Lseal_avx2_tail_128_rounds_and_3xhash + decq %r8 + jge .Lseal_avx2_tail_128_rounds_and_2xhash + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + jmp .Lseal_avx2_short_loop + +.Lseal_avx2_tail_256: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + +.Lseal_avx2_tail_256_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_avx2_tail_256_rounds_and_2xhash: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + decq %rcx + jg .Lseal_avx2_tail_256_rounds_and_3xhash + decq %r8 + jge .Lseal_avx2_tail_256_rounds_and_2xhash + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm1,%ymm1 + vpxor 64+0(%rsi),%ymm5,%ymm5 + vpxor 96+0(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm1,32+0(%rdi) + vmovdqu %ymm5,64+0(%rdi) + vmovdqu %ymm9,96+0(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + movq $128,%rcx + leaq 128(%rsi),%rsi + subq $128,%rbx + jmp .Lseal_avx2_short_hash_remainder + +.Lseal_avx2_tail_384: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + +.Lseal_avx2_tail_384_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_avx2_tail_384_rounds_and_2xhash: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + leaq 32(%rdi),%rdi + decq %rcx + jg .Lseal_avx2_tail_384_rounds_and_3xhash + decq %r8 + jge .Lseal_avx2_tail_384_rounds_and_2xhash + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+0(%rsi),%ymm3,%ymm3 + vpxor 32+0(%rsi),%ymm2,%ymm2 + vpxor 64+0(%rsi),%ymm6,%ymm6 + vpxor 96+0(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+0(%rdi) + vmovdqu %ymm2,32+0(%rdi) + vmovdqu %ymm6,64+0(%rdi) + vmovdqu %ymm10,96+0(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm1,%ymm1 + vpxor 64+128(%rsi),%ymm5,%ymm5 + vpxor 96+128(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm1,32+128(%rdi) + vmovdqu %ymm5,64+128(%rdi) + vmovdqu %ymm9,96+128(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + movq $256,%rcx + leaq 256(%rsi),%rsi + subq $256,%rbx + jmp .Lseal_avx2_short_hash_remainder + +.Lseal_avx2_tail_512: + vmovdqa .Lchacha20_consts(%rip),%ymm0 + vmovdqa 0+64(%rbp),%ymm4 + vmovdqa 0+96(%rbp),%ymm8 + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm10 + vmovdqa %ymm0,%ymm3 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa .Lavx2_inc(%rip),%ymm12 + vpaddd 0+160(%rbp),%ymm12,%ymm15 + vpaddd %ymm15,%ymm12,%ymm14 + vpaddd %ymm14,%ymm12,%ymm13 + vpaddd %ymm13,%ymm12,%ymm12 + vmovdqa %ymm15,0+256(%rbp) + vmovdqa %ymm14,0+224(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm12,0+160(%rbp) + +.Lseal_avx2_tail_512_rounds_and_3xhash: + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + addq %rax,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi +.Lseal_avx2_tail_512_rounds_and_2xhash: + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $4,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $12,%ymm15,%ymm15,%ymm15 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $4,%ymm4,%ymm4,%ymm4 + addq %rax,%r15 + adcq %rdx,%r9 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vmovdqa %ymm8,0+128(%rbp) + vmovdqa .Lrol16(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $20,%ymm7,%ymm8 + vpslld $32-20,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $20,%ymm6,%ymm8 + vpslld $32-20,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $20,%ymm5,%ymm8 + vpslld $32-20,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $20,%ymm4,%ymm8 + vpslld $32-20,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa .Lrol8(%rip),%ymm8 + vpaddd %ymm7,%ymm3,%ymm3 + vpaddd %ymm6,%ymm2,%ymm2 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm3,%ymm15,%ymm15 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm8,%ymm15,%ymm15 + vpshufb %ymm8,%ymm14,%ymm14 + vpshufb %ymm8,%ymm13,%ymm13 + vpshufb %ymm8,%ymm12,%ymm12 + vpaddd %ymm15,%ymm11,%ymm11 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd 0+128(%rbp),%ymm12,%ymm8 + vpxor %ymm11,%ymm7,%ymm7 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa %ymm8,0+128(%rbp) + vpsrld $25,%ymm7,%ymm8 + movq 0+0+0(%rbp),%rdx + movq %rdx,%r15 + mulxq %r10,%r13,%r14 + mulxq %r11,%rax,%rdx + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + vpslld $32-25,%ymm7,%ymm7 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $25,%ymm6,%ymm8 + vpslld $32-25,%ymm6,%ymm6 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $25,%ymm5,%ymm8 + vpslld $32-25,%ymm5,%ymm5 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $25,%ymm4,%ymm8 + vpslld $32-25,%ymm4,%ymm4 + vpxor %ymm8,%ymm4,%ymm4 + vmovdqa 0+128(%rbp),%ymm8 + vpalignr $12,%ymm7,%ymm7,%ymm7 + vpalignr $8,%ymm11,%ymm11,%ymm11 + vpalignr $4,%ymm15,%ymm15,%ymm15 + vpalignr $12,%ymm6,%ymm6,%ymm6 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpalignr $8,%ymm9,%ymm9,%ymm9 + movq 8+0+0(%rbp),%rdx + mulxq %r10,%r10,%rax + addq %r10,%r14 + mulxq %r11,%r11,%r9 + adcq %r11,%r15 + adcq $0,%r9 + imulq %r12,%rdx + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm12,%ymm12,%ymm12 + + + + + + + + + + + + + + + + + addq %rax,%r15 + adcq %rdx,%r9 + + + + + + + + + + + + + + + + + + + + + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + decq %rcx + jg .Lseal_avx2_tail_512_rounds_and_3xhash + decq %r8 + jge .Lseal_avx2_tail_512_rounds_and_2xhash + vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 + vpaddd 0+64(%rbp),%ymm7,%ymm7 + vpaddd 0+96(%rbp),%ymm11,%ymm11 + vpaddd 0+256(%rbp),%ymm15,%ymm15 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd 0+64(%rbp),%ymm6,%ymm6 + vpaddd 0+96(%rbp),%ymm10,%ymm10 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd 0+64(%rbp),%ymm5,%ymm5 + vpaddd 0+96(%rbp),%ymm9,%ymm9 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd 0+64(%rbp),%ymm4,%ymm4 + vpaddd 0+96(%rbp),%ymm8,%ymm8 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + + vmovdqa %ymm0,0+128(%rbp) + vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 + vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 + vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 + vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 + vpxor 0+0(%rsi),%ymm0,%ymm0 + vpxor 32+0(%rsi),%ymm3,%ymm3 + vpxor 64+0(%rsi),%ymm7,%ymm7 + vpxor 96+0(%rsi),%ymm11,%ymm11 + vmovdqu %ymm0,0+0(%rdi) + vmovdqu %ymm3,32+0(%rdi) + vmovdqu %ymm7,64+0(%rdi) + vmovdqu %ymm11,96+0(%rdi) + + vmovdqa 0+128(%rbp),%ymm0 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 + vpxor 0+128(%rsi),%ymm3,%ymm3 + vpxor 32+128(%rsi),%ymm2,%ymm2 + vpxor 64+128(%rsi),%ymm6,%ymm6 + vpxor 96+128(%rsi),%ymm10,%ymm10 + vmovdqu %ymm3,0+128(%rdi) + vmovdqu %ymm2,32+128(%rdi) + vmovdqu %ymm6,64+128(%rdi) + vmovdqu %ymm10,96+128(%rdi) + vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 + vpxor 0+256(%rsi),%ymm3,%ymm3 + vpxor 32+256(%rsi),%ymm1,%ymm1 + vpxor 64+256(%rsi),%ymm5,%ymm5 + vpxor 96+256(%rsi),%ymm9,%ymm9 + vmovdqu %ymm3,0+256(%rdi) + vmovdqu %ymm1,32+256(%rdi) + vmovdqu %ymm5,64+256(%rdi) + vmovdqu %ymm9,96+256(%rdi) + vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 + vmovdqa %ymm3,%ymm8 + + movq $384,%rcx + leaq 384(%rsi),%rsi + subq $384,%rbx + jmp .Lseal_avx2_short_hash_remainder + +.Lseal_avx2_320: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 + vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 + vmovdqa %ymm4,%ymm7 + vmovdqa %ymm8,%ymm11 + vmovdqa %ymm12,0+160(%rbp) + vmovdqa %ymm13,0+192(%rbp) + vmovdqa %ymm14,0+224(%rbp) + movq $10,%r10 +.Lseal_avx2_320_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $12,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $4,%ymm6,%ymm6,%ymm6 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol16(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpsrld $20,%ymm6,%ymm3 + vpslld $12,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpaddd %ymm6,%ymm2,%ymm2 + vpxor %ymm2,%ymm14,%ymm14 + vpshufb .Lrol8(%rip),%ymm14,%ymm14 + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm3 + vpsrld $25,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpalignr $4,%ymm14,%ymm14,%ymm14 + vpalignr $8,%ymm10,%ymm10,%ymm10 + vpalignr $12,%ymm6,%ymm6,%ymm6 + + decq %r10 + jne .Lseal_avx2_320_rounds + vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 + vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 + vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 + vpaddd %ymm7,%ymm4,%ymm4 + vpaddd %ymm7,%ymm5,%ymm5 + vpaddd %ymm7,%ymm6,%ymm6 + vpaddd %ymm11,%ymm8,%ymm8 + vpaddd %ymm11,%ymm9,%ymm9 + vpaddd %ymm11,%ymm10,%ymm10 + vpaddd 0+160(%rbp),%ymm12,%ymm12 + vpaddd 0+192(%rbp),%ymm13,%ymm13 + vpaddd 0+224(%rbp),%ymm14,%ymm14 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand .Lclamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 + vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 + vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 + vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 + vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 + jmp .Lseal_avx2_short + +.Lseal_avx2_192: + vmovdqa %ymm0,%ymm1 + vmovdqa %ymm0,%ymm2 + vmovdqa %ymm4,%ymm5 + vmovdqa %ymm4,%ymm6 + vmovdqa %ymm8,%ymm9 + vmovdqa %ymm8,%ymm10 + vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 + vmovdqa %ymm12,%ymm11 + vmovdqa %ymm13,%ymm15 + movq $10,%r10 +.Lseal_avx2_192_rounds: + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $12,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $4,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $12,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $4,%ymm5,%ymm5,%ymm5 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol16(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $20,%ymm4,%ymm3 + vpslld $12,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpaddd %ymm4,%ymm0,%ymm0 + vpxor %ymm0,%ymm12,%ymm12 + vpshufb .Lrol8(%rip),%ymm12,%ymm12 + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm3 + vpsrld $25,%ymm4,%ymm4 + vpxor %ymm3,%ymm4,%ymm4 + vpalignr $4,%ymm12,%ymm12,%ymm12 + vpalignr $8,%ymm8,%ymm8,%ymm8 + vpalignr $12,%ymm4,%ymm4,%ymm4 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol16(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpsrld $20,%ymm5,%ymm3 + vpslld $12,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpaddd %ymm5,%ymm1,%ymm1 + vpxor %ymm1,%ymm13,%ymm13 + vpshufb .Lrol8(%rip),%ymm13,%ymm13 + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm3 + vpsrld $25,%ymm5,%ymm5 + vpxor %ymm3,%ymm5,%ymm5 + vpalignr $4,%ymm13,%ymm13,%ymm13 + vpalignr $8,%ymm9,%ymm9,%ymm9 + vpalignr $12,%ymm5,%ymm5,%ymm5 + + decq %r10 + jne .Lseal_avx2_192_rounds + vpaddd %ymm2,%ymm0,%ymm0 + vpaddd %ymm2,%ymm1,%ymm1 + vpaddd %ymm6,%ymm4,%ymm4 + vpaddd %ymm6,%ymm5,%ymm5 + vpaddd %ymm10,%ymm8,%ymm8 + vpaddd %ymm10,%ymm9,%ymm9 + vpaddd %ymm11,%ymm12,%ymm12 + vpaddd %ymm15,%ymm13,%ymm13 + vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 + + vpand .Lclamp(%rip),%ymm3,%ymm3 + vmovdqa %ymm3,0+0(%rbp) + + vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 + vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 + vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 + vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 + vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 + vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +.Lseal_avx2_short: + movq %r8,%r8 + call poly_hash_ad_internal + xorq %rcx,%rcx +.Lseal_avx2_short_hash_remainder: + cmpq $16,%rcx + jb .Lseal_avx2_short_loop + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + subq $16,%rcx + addq $16,%rdi + jmp .Lseal_avx2_short_hash_remainder +.Lseal_avx2_short_loop: + cmpq $32,%rbx + jb .Lseal_avx2_short_tail + subq $32,%rbx + + vpxor (%rsi),%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + leaq 32(%rsi),%rsi + + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + addq 0+16(%rdi),%r10 + adcq 8+16(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 32(%rdi),%rdi + + vmovdqa %ymm4,%ymm0 + vmovdqa %ymm8,%ymm4 + vmovdqa %ymm12,%ymm8 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm5,%ymm1 + vmovdqa %ymm9,%ymm5 + vmovdqa %ymm13,%ymm9 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm6,%ymm2 + jmp .Lseal_avx2_short_loop +.Lseal_avx2_short_tail: + cmpq $16,%rbx + jb .Lseal_avx2_exit + subq $16,%rbx + vpxor (%rsi),%xmm0,%xmm3 + vmovdqu %xmm3,(%rdi) + leaq 16(%rsi),%rsi + addq 0+0(%rdi),%r10 + adcq 8+0(%rdi),%r11 + adcq $1,%r12 + movq 0+0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r15 + adcq %r14,%r9 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rdi),%rdi + vextracti128 $1,%ymm0,%xmm0 +.Lseal_avx2_exit: + vzeroupper + jmp .Lseal_sse_tail_16 +.cfi_endproc +.size chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2 +#endif diff --git a/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-win.asm b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-win.asm new file mode 100644 index 0000000000..095689cf33 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-win.asm @@ -0,0 +1,8957 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + +chacha20_poly1305_constants: + +section .rdata rdata align=8 +ALIGN 64 +$L$chacha20_consts: + DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' + DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +$L$rol8: + DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 + DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +$L$rol16: + DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 + DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +$L$avx2_init: + DD 0,0,0,0 +$L$sse_inc: + DD 1,0,0,0 +$L$avx2_inc: + DD 2,0,0,0,2,0,0,0 +$L$clamp: + DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC + DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF +ALIGN 16 +$L$and_masks: + DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 + DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +section .text + + + +ALIGN 64 +poly_hash_ad_internal: + + + xor r10,r10 + xor r11,r11 + xor r12,r12 + cmp r8,13 + jne NEAR $L$hash_ad_loop +$L$poly_fast_tls_ad: + + mov r10,QWORD[rcx] + mov r11,QWORD[5+rcx] + shr r11,24 + mov r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + ret +$L$hash_ad_loop: + + cmp r8,16 + jb NEAR $L$hash_ad_tail + add r10,QWORD[((0+0))+rcx] + adc r11,QWORD[((8+0))+rcx] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rcx,[16+rcx] + sub r8,16 + jmp NEAR $L$hash_ad_loop +$L$hash_ad_tail: + cmp r8,0 + je NEAR $L$hash_ad_done + + xor r13,r13 + xor r14,r14 + xor r15,r15 + add rcx,r8 +$L$hash_ad_tail_loop: + shld r14,r13,8 + shl r13,8 + movzx r15,BYTE[((-1))+rcx] + xor r13,r15 + dec rcx + dec r8 + jne NEAR $L$hash_ad_tail_loop + + add r10,r13 + adc r11,r14 + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + +$L$hash_ad_done: + ret + + + +global chacha20_poly1305_open + +ALIGN 64 +chacha20_poly1305_open: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_chacha20_poly1305_open: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + + + push r9 + + sub rsp,288 + 160 + 32 + + + lea rbp,[32+rsp] + and rbp,-32 + + movaps XMMWORD[(0+0)+rbp],xmm6 + movaps XMMWORD[(16+0)+rbp],xmm7 + movaps XMMWORD[(32+0)+rbp],xmm8 + movaps XMMWORD[(48+0)+rbp],xmm9 + movaps XMMWORD[(64+0)+rbp],xmm10 + movaps XMMWORD[(80+0)+rbp],xmm11 + movaps XMMWORD[(96+0)+rbp],xmm12 + movaps XMMWORD[(112+0)+rbp],xmm13 + movaps XMMWORD[(128+0)+rbp],xmm14 + movaps XMMWORD[(144+0)+rbp],xmm15 + + mov rbx,rdx + mov QWORD[((0+160+32))+rbp],r8 + mov QWORD[((8+160+32))+rbp],rbx + + mov eax,DWORD[((OPENSSL_ia32cap_P+8))] + and eax,288 + xor eax,288 + jz NEAR chacha20_poly1305_open_avx2 + + cmp rbx,128 + jbe NEAR $L$open_sse_128 + + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqu xmm4,XMMWORD[r9] + movdqu xmm8,XMMWORD[16+r9] + movdqu xmm12,XMMWORD[32+r9] + + movdqa xmm7,xmm12 + + movdqa XMMWORD[(160+48)+rbp],xmm4 + movdqa XMMWORD[(160+64)+rbp],xmm8 + movdqa XMMWORD[(160+96)+rbp],xmm12 + mov r10,10 +$L$open_sse_init_rounds: + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + + dec r10 + jne NEAR $L$open_sse_init_rounds + + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + + pand xmm0,XMMWORD[$L$clamp] + movdqa XMMWORD[(160+0)+rbp],xmm0 + movdqa XMMWORD[(160+16)+rbp],xmm4 + + mov r8,r8 + call poly_hash_ad_internal +$L$open_sse_main_loop: + cmp rbx,16*16 + jb NEAR $L$open_sse_tail + + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + movdqa xmm10,xmm8 + movdqa xmm3,xmm0 + movdqa xmm7,xmm4 + movdqa xmm11,xmm8 + movdqa xmm15,XMMWORD[((160+96))+rbp] + paddd xmm15,XMMWORD[$L$sse_inc] + movdqa xmm14,xmm15 + paddd xmm14,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm14 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + movdqa XMMWORD[(160+128)+rbp],xmm14 + movdqa XMMWORD[(160+144)+rbp],xmm15 + + + + mov rcx,4 + mov r8,rsi +$L$open_sse_main_loop_rounds: + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,XMMWORD[$L$rol16] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + + lea r8,[16+r8] + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,20 + pslld xmm7,32-20 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,20 + pslld xmm6,32-20 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,20 + pslld xmm5,32-20 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,20 + pslld xmm4,32-20 + pxor xmm4,xmm8 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + movdqa xmm8,XMMWORD[$L$rol8] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,25 + pslld xmm7,32-25 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,25 + pslld xmm6,32-25 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,25 + pslld xmm5,32-25 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,25 + pslld xmm4,32-25 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[((160+80))+rbp] + imul r9,r12 + add r15,r10 + adc r9,rdx +DB 102,15,58,15,255,4 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,12 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,XMMWORD[$L$rol16] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,20 + pslld xmm7,32-20 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,20 + pslld xmm6,32-20 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,20 + pslld xmm5,32-20 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,20 + pslld xmm4,32-20 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[$L$rol8] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,25 + pslld xmm7,32-25 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,25 + pslld xmm6,32-25 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,25 + pslld xmm5,32-25 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,25 + pslld xmm4,32-25 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[((160+80))+rbp] +DB 102,15,58,15,255,12 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,4 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + + dec rcx + jge NEAR $L$open_sse_main_loop_rounds + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea r8,[16+r8] + cmp rcx,-6 + jg NEAR $L$open_sse_main_loop_rounds + paddd xmm3,XMMWORD[$L$chacha20_consts] + paddd xmm7,XMMWORD[((160+48))+rbp] + paddd xmm11,XMMWORD[((160+64))+rbp] + paddd xmm15,XMMWORD[((160+144))+rbp] + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm6,XMMWORD[((160+48))+rbp] + paddd xmm10,XMMWORD[((160+64))+rbp] + paddd xmm14,XMMWORD[((160+128))+rbp] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + movdqa XMMWORD[(160+80)+rbp],xmm12 + movdqu xmm12,XMMWORD[((0 + 0))+rsi] + pxor xmm12,xmm3 + movdqu XMMWORD[(0 + 0)+rdi],xmm12 + movdqu xmm12,XMMWORD[((16 + 0))+rsi] + pxor xmm12,xmm7 + movdqu XMMWORD[(16 + 0)+rdi],xmm12 + movdqu xmm12,XMMWORD[((32 + 0))+rsi] + pxor xmm12,xmm11 + movdqu XMMWORD[(32 + 0)+rdi],xmm12 + movdqu xmm12,XMMWORD[((48 + 0))+rsi] + pxor xmm12,xmm15 + movdqu XMMWORD[(48 + 0)+rdi],xmm12 + movdqu xmm3,XMMWORD[((0 + 64))+rsi] + movdqu xmm7,XMMWORD[((16 + 64))+rsi] + movdqu xmm11,XMMWORD[((32 + 64))+rsi] + movdqu xmm15,XMMWORD[((48 + 64))+rsi] + pxor xmm2,xmm3 + pxor xmm6,xmm7 + pxor xmm10,xmm11 + pxor xmm15,xmm14 + movdqu XMMWORD[(0 + 64)+rdi],xmm2 + movdqu XMMWORD[(16 + 64)+rdi],xmm6 + movdqu XMMWORD[(32 + 64)+rdi],xmm10 + movdqu XMMWORD[(48 + 64)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 128))+rsi] + movdqu xmm7,XMMWORD[((16 + 128))+rsi] + movdqu xmm11,XMMWORD[((32 + 128))+rsi] + movdqu xmm15,XMMWORD[((48 + 128))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 128)+rdi],xmm1 + movdqu XMMWORD[(16 + 128)+rdi],xmm5 + movdqu XMMWORD[(32 + 128)+rdi],xmm9 + movdqu XMMWORD[(48 + 128)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 192))+rsi] + movdqu xmm7,XMMWORD[((16 + 192))+rsi] + movdqu xmm11,XMMWORD[((32 + 192))+rsi] + movdqu xmm15,XMMWORD[((48 + 192))+rsi] + pxor xmm0,xmm3 + pxor xmm4,xmm7 + pxor xmm8,xmm11 + pxor xmm15,XMMWORD[((160+80))+rbp] + movdqu XMMWORD[(0 + 192)+rdi],xmm0 + movdqu XMMWORD[(16 + 192)+rdi],xmm4 + movdqu XMMWORD[(32 + 192)+rdi],xmm8 + movdqu XMMWORD[(48 + 192)+rdi],xmm15 + + lea rsi,[256+rsi] + lea rdi,[256+rdi] + sub rbx,16*16 + jmp NEAR $L$open_sse_main_loop +$L$open_sse_tail: + + test rbx,rbx + jz NEAR $L$open_sse_finalize + cmp rbx,12*16 + ja NEAR $L$open_sse_tail_256 + cmp rbx,8*16 + ja NEAR $L$open_sse_tail_192 + cmp rbx,4*16 + ja NEAR $L$open_sse_tail_128 + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm12,XMMWORD[((160+96))+rbp] + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + + xor r8,r8 + mov rcx,rbx + cmp rcx,16 + jb NEAR $L$open_sse_tail_64_rounds +$L$open_sse_tail_64_rounds_and_x1hash: + add r10,QWORD[((0+0))+r8*1+rsi] + adc r11,QWORD[((8+0))+r8*1+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + sub rcx,16 +$L$open_sse_tail_64_rounds: + add r8,16 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + + cmp rcx,16 + jae NEAR $L$open_sse_tail_64_rounds_and_x1hash + cmp r8,10*16 + jne NEAR $L$open_sse_tail_64_rounds + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + + jmp NEAR $L$open_sse_tail_64_dec_loop + +$L$open_sse_tail_128: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm13,XMMWORD[((160+96))+rbp] + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + + mov rcx,rbx + and rcx,-16 + xor r8,r8 +$L$open_sse_tail_128_rounds_and_x1hash: + add r10,QWORD[((0+0))+r8*1+rsi] + adc r11,QWORD[((8+0))+r8*1+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + +$L$open_sse_tail_128_rounds: + add r8,16 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + + cmp r8,rcx + jb NEAR $L$open_sse_tail_128_rounds_and_x1hash + cmp r8,10*16 + jne NEAR $L$open_sse_tail_128_rounds + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + movdqu xmm3,XMMWORD[((0 + 0))+rsi] + movdqu xmm7,XMMWORD[((16 + 0))+rsi] + movdqu xmm11,XMMWORD[((32 + 0))+rsi] + movdqu xmm15,XMMWORD[((48 + 0))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 0)+rdi],xmm1 + movdqu XMMWORD[(16 + 0)+rdi],xmm5 + movdqu XMMWORD[(32 + 0)+rdi],xmm9 + movdqu XMMWORD[(48 + 0)+rdi],xmm15 + + sub rbx,4*16 + lea rsi,[64+rsi] + lea rdi,[64+rdi] + jmp NEAR $L$open_sse_tail_64_dec_loop + +$L$open_sse_tail_192: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + movdqa xmm10,xmm8 + movdqa xmm14,XMMWORD[((160+96))+rbp] + paddd xmm14,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm14 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + movdqa XMMWORD[(160+128)+rbp],xmm14 + + mov rcx,rbx + mov r8,10*16 + cmp rcx,10*16 + cmovg rcx,r8 + and rcx,-16 + xor r8,r8 +$L$open_sse_tail_192_rounds_and_x1hash: + add r10,QWORD[((0+0))+r8*1+rsi] + adc r11,QWORD[((8+0))+r8*1+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + +$L$open_sse_tail_192_rounds: + add r8,16 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 + + cmp r8,rcx + jb NEAR $L$open_sse_tail_192_rounds_and_x1hash + cmp r8,10*16 + jne NEAR $L$open_sse_tail_192_rounds + cmp rbx,11*16 + jb NEAR $L$open_sse_tail_192_finish + add r10,QWORD[((0+160))+rsi] + adc r11,QWORD[((8+160))+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + cmp rbx,12*16 + jb NEAR $L$open_sse_tail_192_finish + add r10,QWORD[((0+176))+rsi] + adc r11,QWORD[((8+176))+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + +$L$open_sse_tail_192_finish: + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm6,XMMWORD[((160+48))+rbp] + paddd xmm10,XMMWORD[((160+64))+rbp] + paddd xmm14,XMMWORD[((160+128))+rbp] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + movdqu xmm3,XMMWORD[((0 + 0))+rsi] + movdqu xmm7,XMMWORD[((16 + 0))+rsi] + movdqu xmm11,XMMWORD[((32 + 0))+rsi] + movdqu xmm15,XMMWORD[((48 + 0))+rsi] + pxor xmm2,xmm3 + pxor xmm6,xmm7 + pxor xmm10,xmm11 + pxor xmm15,xmm14 + movdqu XMMWORD[(0 + 0)+rdi],xmm2 + movdqu XMMWORD[(16 + 0)+rdi],xmm6 + movdqu XMMWORD[(32 + 0)+rdi],xmm10 + movdqu XMMWORD[(48 + 0)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 64))+rsi] + movdqu xmm7,XMMWORD[((16 + 64))+rsi] + movdqu xmm11,XMMWORD[((32 + 64))+rsi] + movdqu xmm15,XMMWORD[((48 + 64))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 64)+rdi],xmm1 + movdqu XMMWORD[(16 + 64)+rdi],xmm5 + movdqu XMMWORD[(32 + 64)+rdi],xmm9 + movdqu XMMWORD[(48 + 64)+rdi],xmm15 + + sub rbx,8*16 + lea rsi,[128+rsi] + lea rdi,[128+rdi] + jmp NEAR $L$open_sse_tail_64_dec_loop + +$L$open_sse_tail_256: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + movdqa xmm10,xmm8 + movdqa xmm3,xmm0 + movdqa xmm7,xmm4 + movdqa xmm11,xmm8 + movdqa xmm15,XMMWORD[((160+96))+rbp] + paddd xmm15,XMMWORD[$L$sse_inc] + movdqa xmm14,xmm15 + paddd xmm14,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm14 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + movdqa XMMWORD[(160+128)+rbp],xmm14 + movdqa XMMWORD[(160+144)+rbp],xmm15 + + xor r8,r8 +$L$open_sse_tail_256_rounds_and_x1hash: + add r10,QWORD[((0+0))+r8*1+rsi] + adc r11,QWORD[((8+0))+r8*1+rsi] + adc r12,1 + movdqa XMMWORD[(160+80)+rbp],xmm11 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm11,xmm4 + pslld xmm11,12 + psrld xmm4,20 + pxor xmm4,xmm11 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm11,xmm4 + pslld xmm11,7 + psrld xmm4,25 + pxor xmm4,xmm11 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm11,xmm5 + pslld xmm11,12 + psrld xmm5,20 + pxor xmm5,xmm11 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm11,xmm5 + pslld xmm11,7 + psrld xmm5,25 + pxor xmm5,xmm11 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm11,xmm6 + pslld xmm11,12 + psrld xmm6,20 + pxor xmm6,xmm11 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm11,xmm6 + pslld xmm11,7 + psrld xmm6,25 + pxor xmm6,xmm11 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 + movdqa xmm11,XMMWORD[((160+80))+rbp] + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + movdqa XMMWORD[(160+80)+rbp],xmm9 + paddd xmm3,xmm7 + pxor xmm15,xmm3 + pshufb xmm15,XMMWORD[$L$rol16] + paddd xmm11,xmm15 + pxor xmm7,xmm11 + movdqa xmm9,xmm7 + pslld xmm9,12 + psrld xmm7,20 + pxor xmm7,xmm9 + paddd xmm3,xmm7 + pxor xmm15,xmm3 + pshufb xmm15,XMMWORD[$L$rol8] + paddd xmm11,xmm15 + pxor xmm7,xmm11 + movdqa xmm9,xmm7 + pslld xmm9,7 + psrld xmm7,25 + pxor xmm7,xmm9 +DB 102,15,58,15,255,4 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,12 + movdqa xmm9,XMMWORD[((160+80))+rbp] + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + movdqa XMMWORD[(160+80)+rbp],xmm11 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm11,xmm4 + pslld xmm11,12 + psrld xmm4,20 + pxor xmm4,xmm11 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm11,xmm4 + pslld xmm11,7 + psrld xmm4,25 + pxor xmm4,xmm11 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm11,xmm5 + pslld xmm11,12 + psrld xmm5,20 + pxor xmm5,xmm11 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm11,xmm5 + pslld xmm11,7 + psrld xmm5,25 + pxor xmm5,xmm11 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + imul r9,r12 + add r15,r10 + adc r9,rdx + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm11,xmm6 + pslld xmm11,12 + psrld xmm6,20 + pxor xmm6,xmm11 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm11,xmm6 + pslld xmm11,7 + psrld xmm6,25 + pxor xmm6,xmm11 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 + movdqa xmm11,XMMWORD[((160+80))+rbp] + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + movdqa XMMWORD[(160+80)+rbp],xmm9 + paddd xmm3,xmm7 + pxor xmm15,xmm3 + pshufb xmm15,XMMWORD[$L$rol16] + paddd xmm11,xmm15 + pxor xmm7,xmm11 + movdqa xmm9,xmm7 + pslld xmm9,12 + psrld xmm7,20 + pxor xmm7,xmm9 + paddd xmm3,xmm7 + pxor xmm15,xmm3 + pshufb xmm15,XMMWORD[$L$rol8] + paddd xmm11,xmm15 + pxor xmm7,xmm11 + movdqa xmm9,xmm7 + pslld xmm9,7 + psrld xmm7,25 + pxor xmm7,xmm9 +DB 102,15,58,15,255,12 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,4 + movdqa xmm9,XMMWORD[((160+80))+rbp] + + add r8,16 + cmp r8,10*16 + jb NEAR $L$open_sse_tail_256_rounds_and_x1hash + + mov rcx,rbx + and rcx,-16 +$L$open_sse_tail_256_hash: + add r10,QWORD[((0+0))+r8*1+rsi] + adc r11,QWORD[((8+0))+r8*1+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + add r8,16 + cmp r8,rcx + jb NEAR $L$open_sse_tail_256_hash + paddd xmm3,XMMWORD[$L$chacha20_consts] + paddd xmm7,XMMWORD[((160+48))+rbp] + paddd xmm11,XMMWORD[((160+64))+rbp] + paddd xmm15,XMMWORD[((160+144))+rbp] + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm6,XMMWORD[((160+48))+rbp] + paddd xmm10,XMMWORD[((160+64))+rbp] + paddd xmm14,XMMWORD[((160+128))+rbp] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + movdqa XMMWORD[(160+80)+rbp],xmm12 + movdqu xmm12,XMMWORD[((0 + 0))+rsi] + pxor xmm12,xmm3 + movdqu XMMWORD[(0 + 0)+rdi],xmm12 + movdqu xmm12,XMMWORD[((16 + 0))+rsi] + pxor xmm12,xmm7 + movdqu XMMWORD[(16 + 0)+rdi],xmm12 + movdqu xmm12,XMMWORD[((32 + 0))+rsi] + pxor xmm12,xmm11 + movdqu XMMWORD[(32 + 0)+rdi],xmm12 + movdqu xmm12,XMMWORD[((48 + 0))+rsi] + pxor xmm12,xmm15 + movdqu XMMWORD[(48 + 0)+rdi],xmm12 + movdqu xmm3,XMMWORD[((0 + 64))+rsi] + movdqu xmm7,XMMWORD[((16 + 64))+rsi] + movdqu xmm11,XMMWORD[((32 + 64))+rsi] + movdqu xmm15,XMMWORD[((48 + 64))+rsi] + pxor xmm2,xmm3 + pxor xmm6,xmm7 + pxor xmm10,xmm11 + pxor xmm15,xmm14 + movdqu XMMWORD[(0 + 64)+rdi],xmm2 + movdqu XMMWORD[(16 + 64)+rdi],xmm6 + movdqu XMMWORD[(32 + 64)+rdi],xmm10 + movdqu XMMWORD[(48 + 64)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 128))+rsi] + movdqu xmm7,XMMWORD[((16 + 128))+rsi] + movdqu xmm11,XMMWORD[((32 + 128))+rsi] + movdqu xmm15,XMMWORD[((48 + 128))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 128)+rdi],xmm1 + movdqu XMMWORD[(16 + 128)+rdi],xmm5 + movdqu XMMWORD[(32 + 128)+rdi],xmm9 + movdqu XMMWORD[(48 + 128)+rdi],xmm15 + + movdqa xmm12,XMMWORD[((160+80))+rbp] + sub rbx,12*16 + lea rsi,[192+rsi] + lea rdi,[192+rdi] + + +$L$open_sse_tail_64_dec_loop: + cmp rbx,16 + jb NEAR $L$open_sse_tail_16_init + sub rbx,16 + movdqu xmm3,XMMWORD[rsi] + pxor xmm0,xmm3 + movdqu XMMWORD[rdi],xmm0 + lea rsi,[16+rsi] + lea rdi,[16+rdi] + movdqa xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm8,xmm12 + jmp NEAR $L$open_sse_tail_64_dec_loop +$L$open_sse_tail_16_init: + movdqa xmm1,xmm0 + + +$L$open_sse_tail_16: + test rbx,rbx + jz NEAR $L$open_sse_finalize + + + + pxor xmm3,xmm3 + lea rsi,[((-1))+rbx*1+rsi] + mov r8,rbx +$L$open_sse_tail_16_compose: + pslldq xmm3,1 + pinsrb xmm3,BYTE[rsi],0 + sub rsi,1 + sub r8,1 + jnz NEAR $L$open_sse_tail_16_compose + +DB 102,73,15,126,221 + pextrq r14,xmm3,1 + + pxor xmm3,xmm1 + + +$L$open_sse_tail_16_extract: + pextrb XMMWORD[rdi],xmm3,0 + psrldq xmm3,1 + add rdi,1 + sub rbx,1 + jne NEAR $L$open_sse_tail_16_extract + + add r10,r13 + adc r11,r14 + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + +$L$open_sse_finalize: + add r10,QWORD[((0+160+32))+rbp] + adc r11,QWORD[((8+160+32))+rbp] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + + mov r13,r10 + mov r14,r11 + mov r15,r12 + sub r10,-5 + sbb r11,-1 + sbb r12,3 + cmovc r10,r13 + cmovc r11,r14 + cmovc r12,r15 + + add r10,QWORD[((0+160+16))+rbp] + adc r11,QWORD[((8+160+16))+rbp] + + movaps xmm6,XMMWORD[((0+0))+rbp] + movaps xmm7,XMMWORD[((16+0))+rbp] + movaps xmm8,XMMWORD[((32+0))+rbp] + movaps xmm9,XMMWORD[((48+0))+rbp] + movaps xmm10,XMMWORD[((64+0))+rbp] + movaps xmm11,XMMWORD[((80+0))+rbp] + movaps xmm12,XMMWORD[((96+0))+rbp] + movaps xmm13,XMMWORD[((112+0))+rbp] + movaps xmm14,XMMWORD[((128+0))+rbp] + movaps xmm15,XMMWORD[((144+0))+rbp] + + + add rsp,288 + 160 + 32 + + + pop r9 + + mov QWORD[r9],r10 + mov QWORD[8+r9],r11 + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbx + + pop rbp + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$open_sse_128: + + movdqu xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm1,xmm0 + movdqa xmm2,xmm0 + movdqu xmm4,XMMWORD[r9] + movdqa xmm5,xmm4 + movdqa xmm6,xmm4 + movdqu xmm8,XMMWORD[16+r9] + movdqa xmm9,xmm8 + movdqa xmm10,xmm8 + movdqu xmm12,XMMWORD[32+r9] + movdqa xmm13,xmm12 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm14,xmm13 + paddd xmm14,XMMWORD[$L$sse_inc] + movdqa xmm7,xmm4 + movdqa xmm11,xmm8 + movdqa xmm15,xmm13 + mov r10,10 + +$L$open_sse_128_rounds: + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 + + dec r10 + jnz NEAR $L$open_sse_128_rounds + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm4,xmm7 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + paddd xmm9,xmm11 + paddd xmm10,xmm11 + paddd xmm13,xmm15 + paddd xmm15,XMMWORD[$L$sse_inc] + paddd xmm14,xmm15 + + pand xmm0,XMMWORD[$L$clamp] + movdqa XMMWORD[(160+0)+rbp],xmm0 + movdqa XMMWORD[(160+16)+rbp],xmm4 + + mov r8,r8 + call poly_hash_ad_internal +$L$open_sse_128_xor_hash: + cmp rbx,16 + jb NEAR $L$open_sse_tail_16 + sub rbx,16 + add r10,QWORD[((0+0))+rsi] + adc r11,QWORD[((8+0))+rsi] + adc r12,1 + + + movdqu xmm3,XMMWORD[rsi] + pxor xmm1,xmm3 + movdqu XMMWORD[rdi],xmm1 + lea rsi,[16+rsi] + lea rdi,[16+rdi] + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + + movdqa xmm1,xmm5 + movdqa xmm5,xmm9 + movdqa xmm9,xmm13 + movdqa xmm13,xmm2 + movdqa xmm2,xmm6 + movdqa xmm6,xmm10 + movdqa xmm10,xmm14 + jmp NEAR $L$open_sse_128_xor_hash +$L$SEH_end_chacha20_poly1305_open: + + + + + + + + +global chacha20_poly1305_seal + +ALIGN 64 +chacha20_poly1305_seal: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_chacha20_poly1305_seal: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +_CET_ENDBR + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + + + push r9 + + sub rsp,288 + 160 + 32 + + lea rbp,[32+rsp] + and rbp,-32 + + movaps XMMWORD[(0+0)+rbp],xmm6 + movaps XMMWORD[(16+0)+rbp],xmm7 + movaps XMMWORD[(32+0)+rbp],xmm8 + movaps XMMWORD[(48+0)+rbp],xmm9 + movaps XMMWORD[(64+0)+rbp],xmm10 + movaps XMMWORD[(80+0)+rbp],xmm11 + movaps XMMWORD[(96+0)+rbp],xmm12 + movaps XMMWORD[(112+0)+rbp],xmm13 + movaps XMMWORD[(128+0)+rbp],xmm14 + movaps XMMWORD[(144+0)+rbp],xmm15 + + mov rbx,QWORD[56+r9] + add rbx,rdx + mov QWORD[((0+160+32))+rbp],r8 + mov QWORD[((8+160+32))+rbp],rbx + mov rbx,rdx + + mov eax,DWORD[((OPENSSL_ia32cap_P+8))] + and eax,288 + xor eax,288 + jz NEAR chacha20_poly1305_seal_avx2 + + cmp rbx,128 + jbe NEAR $L$seal_sse_128 + + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqu xmm4,XMMWORD[r9] + movdqu xmm8,XMMWORD[16+r9] + movdqu xmm12,XMMWORD[32+r9] + + movdqa xmm1,xmm0 + movdqa xmm2,xmm0 + movdqa xmm3,xmm0 + movdqa xmm5,xmm4 + movdqa xmm6,xmm4 + movdqa xmm7,xmm4 + movdqa xmm9,xmm8 + movdqa xmm10,xmm8 + movdqa xmm11,xmm8 + movdqa xmm15,xmm12 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa xmm14,xmm12 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm12 + paddd xmm12,XMMWORD[$L$sse_inc] + + movdqa XMMWORD[(160+48)+rbp],xmm4 + movdqa XMMWORD[(160+64)+rbp],xmm8 + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + movdqa XMMWORD[(160+128)+rbp],xmm14 + movdqa XMMWORD[(160+144)+rbp],xmm15 + mov r10,10 +$L$seal_sse_init_rounds: + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,XMMWORD[$L$rol16] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,20 + pslld xmm7,32-20 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,20 + pslld xmm6,32-20 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,20 + pslld xmm5,32-20 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,20 + pslld xmm4,32-20 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[$L$rol8] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,25 + pslld xmm7,32-25 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,25 + pslld xmm6,32-25 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,25 + pslld xmm5,32-25 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,25 + pslld xmm4,32-25 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[((160+80))+rbp] +DB 102,15,58,15,255,4 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,12 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,XMMWORD[$L$rol16] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,20 + pslld xmm7,32-20 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,20 + pslld xmm6,32-20 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,20 + pslld xmm5,32-20 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,20 + pslld xmm4,32-20 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[$L$rol8] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,25 + pslld xmm7,32-25 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,25 + pslld xmm6,32-25 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,25 + pslld xmm5,32-25 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,25 + pslld xmm4,32-25 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[((160+80))+rbp] +DB 102,15,58,15,255,12 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,4 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + + dec r10 + jnz NEAR $L$seal_sse_init_rounds + paddd xmm3,XMMWORD[$L$chacha20_consts] + paddd xmm7,XMMWORD[((160+48))+rbp] + paddd xmm11,XMMWORD[((160+64))+rbp] + paddd xmm15,XMMWORD[((160+144))+rbp] + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm6,XMMWORD[((160+48))+rbp] + paddd xmm10,XMMWORD[((160+64))+rbp] + paddd xmm14,XMMWORD[((160+128))+rbp] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + + + pand xmm3,XMMWORD[$L$clamp] + movdqa XMMWORD[(160+0)+rbp],xmm3 + movdqa XMMWORD[(160+16)+rbp],xmm7 + + mov r8,r8 + call poly_hash_ad_internal + movdqu xmm3,XMMWORD[((0 + 0))+rsi] + movdqu xmm7,XMMWORD[((16 + 0))+rsi] + movdqu xmm11,XMMWORD[((32 + 0))+rsi] + movdqu xmm15,XMMWORD[((48 + 0))+rsi] + pxor xmm2,xmm3 + pxor xmm6,xmm7 + pxor xmm10,xmm11 + pxor xmm15,xmm14 + movdqu XMMWORD[(0 + 0)+rdi],xmm2 + movdqu XMMWORD[(16 + 0)+rdi],xmm6 + movdqu XMMWORD[(32 + 0)+rdi],xmm10 + movdqu XMMWORD[(48 + 0)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 64))+rsi] + movdqu xmm7,XMMWORD[((16 + 64))+rsi] + movdqu xmm11,XMMWORD[((32 + 64))+rsi] + movdqu xmm15,XMMWORD[((48 + 64))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 64)+rdi],xmm1 + movdqu XMMWORD[(16 + 64)+rdi],xmm5 + movdqu XMMWORD[(32 + 64)+rdi],xmm9 + movdqu XMMWORD[(48 + 64)+rdi],xmm15 + + cmp rbx,12*16 + ja NEAR $L$seal_sse_main_init + mov rcx,8*16 + sub rbx,8*16 + lea rsi,[128+rsi] + jmp NEAR $L$seal_sse_128_tail_hash +$L$seal_sse_main_init: + movdqu xmm3,XMMWORD[((0 + 128))+rsi] + movdqu xmm7,XMMWORD[((16 + 128))+rsi] + movdqu xmm11,XMMWORD[((32 + 128))+rsi] + movdqu xmm15,XMMWORD[((48 + 128))+rsi] + pxor xmm0,xmm3 + pxor xmm4,xmm7 + pxor xmm8,xmm11 + pxor xmm15,xmm12 + movdqu XMMWORD[(0 + 128)+rdi],xmm0 + movdqu XMMWORD[(16 + 128)+rdi],xmm4 + movdqu XMMWORD[(32 + 128)+rdi],xmm8 + movdqu XMMWORD[(48 + 128)+rdi],xmm15 + + mov rcx,12*16 + sub rbx,12*16 + lea rsi,[192+rsi] + mov rcx,2 + mov r8,8 + cmp rbx,4*16 + jbe NEAR $L$seal_sse_tail_64 + cmp rbx,8*16 + jbe NEAR $L$seal_sse_tail_128 + cmp rbx,12*16 + jbe NEAR $L$seal_sse_tail_192 + +$L$seal_sse_main_loop: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + movdqa xmm10,xmm8 + movdqa xmm3,xmm0 + movdqa xmm7,xmm4 + movdqa xmm11,xmm8 + movdqa xmm15,XMMWORD[((160+96))+rbp] + paddd xmm15,XMMWORD[$L$sse_inc] + movdqa xmm14,xmm15 + paddd xmm14,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm14 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + movdqa XMMWORD[(160+128)+rbp],xmm14 + movdqa XMMWORD[(160+144)+rbp],xmm15 + +ALIGN 32 +$L$seal_sse_main_rounds: + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,XMMWORD[$L$rol16] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,20 + pslld xmm7,32-20 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,20 + pslld xmm6,32-20 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,20 + pslld xmm5,32-20 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,20 + pslld xmm4,32-20 + pxor xmm4,xmm8 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + movdqa xmm8,XMMWORD[$L$rol8] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,25 + pslld xmm7,32-25 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,25 + pslld xmm6,32-25 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,25 + pslld xmm5,32-25 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,25 + pslld xmm4,32-25 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[((160+80))+rbp] + imul r9,r12 + add r15,r10 + adc r9,rdx +DB 102,15,58,15,255,4 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,12 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,XMMWORD[$L$rol16] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,20 + pslld xmm7,32-20 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,20 + pslld xmm6,32-20 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,20 + pslld xmm5,32-20 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,20 + pslld xmm4,32-20 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[$L$rol8] + paddd xmm3,xmm7 + paddd xmm2,xmm6 + paddd xmm1,xmm5 + paddd xmm0,xmm4 + pxor xmm15,xmm3 + pxor xmm14,xmm2 + pxor xmm13,xmm1 + pxor xmm12,xmm0 +DB 102,69,15,56,0,248 +DB 102,69,15,56,0,240 +DB 102,69,15,56,0,232 +DB 102,69,15,56,0,224 + movdqa xmm8,XMMWORD[((160+80))+rbp] + paddd xmm11,xmm15 + paddd xmm10,xmm14 + paddd xmm9,xmm13 + paddd xmm8,xmm12 + pxor xmm7,xmm11 + pxor xmm6,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm8 + movdqa XMMWORD[(160+80)+rbp],xmm8 + movdqa xmm8,xmm7 + psrld xmm8,25 + pslld xmm7,32-25 + pxor xmm7,xmm8 + movdqa xmm8,xmm6 + psrld xmm8,25 + pslld xmm6,32-25 + pxor xmm6,xmm8 + movdqa xmm8,xmm5 + psrld xmm8,25 + pslld xmm5,32-25 + pxor xmm5,xmm8 + movdqa xmm8,xmm4 + psrld xmm8,25 + pslld xmm4,32-25 + pxor xmm4,xmm8 + movdqa xmm8,XMMWORD[((160+80))+rbp] +DB 102,15,58,15,255,12 +DB 102,69,15,58,15,219,8 +DB 102,69,15,58,15,255,4 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + + lea rdi,[16+rdi] + dec r8 + jge NEAR $L$seal_sse_main_rounds + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] + dec rcx + jg NEAR $L$seal_sse_main_rounds + paddd xmm3,XMMWORD[$L$chacha20_consts] + paddd xmm7,XMMWORD[((160+48))+rbp] + paddd xmm11,XMMWORD[((160+64))+rbp] + paddd xmm15,XMMWORD[((160+144))+rbp] + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm6,XMMWORD[((160+48))+rbp] + paddd xmm10,XMMWORD[((160+64))+rbp] + paddd xmm14,XMMWORD[((160+128))+rbp] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + + movdqa XMMWORD[(160+80)+rbp],xmm14 + movdqa XMMWORD[(160+80)+rbp],xmm14 + movdqu xmm14,XMMWORD[((0 + 0))+rsi] + pxor xmm14,xmm3 + movdqu XMMWORD[(0 + 0)+rdi],xmm14 + movdqu xmm14,XMMWORD[((16 + 0))+rsi] + pxor xmm14,xmm7 + movdqu XMMWORD[(16 + 0)+rdi],xmm14 + movdqu xmm14,XMMWORD[((32 + 0))+rsi] + pxor xmm14,xmm11 + movdqu XMMWORD[(32 + 0)+rdi],xmm14 + movdqu xmm14,XMMWORD[((48 + 0))+rsi] + pxor xmm14,xmm15 + movdqu XMMWORD[(48 + 0)+rdi],xmm14 + + movdqa xmm14,XMMWORD[((160+80))+rbp] + movdqu xmm3,XMMWORD[((0 + 64))+rsi] + movdqu xmm7,XMMWORD[((16 + 64))+rsi] + movdqu xmm11,XMMWORD[((32 + 64))+rsi] + movdqu xmm15,XMMWORD[((48 + 64))+rsi] + pxor xmm2,xmm3 + pxor xmm6,xmm7 + pxor xmm10,xmm11 + pxor xmm15,xmm14 + movdqu XMMWORD[(0 + 64)+rdi],xmm2 + movdqu XMMWORD[(16 + 64)+rdi],xmm6 + movdqu XMMWORD[(32 + 64)+rdi],xmm10 + movdqu XMMWORD[(48 + 64)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 128))+rsi] + movdqu xmm7,XMMWORD[((16 + 128))+rsi] + movdqu xmm11,XMMWORD[((32 + 128))+rsi] + movdqu xmm15,XMMWORD[((48 + 128))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 128)+rdi],xmm1 + movdqu XMMWORD[(16 + 128)+rdi],xmm5 + movdqu XMMWORD[(32 + 128)+rdi],xmm9 + movdqu XMMWORD[(48 + 128)+rdi],xmm15 + + cmp rbx,16*16 + ja NEAR $L$seal_sse_main_loop_xor + + mov rcx,12*16 + sub rbx,12*16 + lea rsi,[192+rsi] + jmp NEAR $L$seal_sse_128_tail_hash +$L$seal_sse_main_loop_xor: + movdqu xmm3,XMMWORD[((0 + 192))+rsi] + movdqu xmm7,XMMWORD[((16 + 192))+rsi] + movdqu xmm11,XMMWORD[((32 + 192))+rsi] + movdqu xmm15,XMMWORD[((48 + 192))+rsi] + pxor xmm0,xmm3 + pxor xmm4,xmm7 + pxor xmm8,xmm11 + pxor xmm15,xmm12 + movdqu XMMWORD[(0 + 192)+rdi],xmm0 + movdqu XMMWORD[(16 + 192)+rdi],xmm4 + movdqu XMMWORD[(32 + 192)+rdi],xmm8 + movdqu XMMWORD[(48 + 192)+rdi],xmm15 + + lea rsi,[256+rsi] + sub rbx,16*16 + mov rcx,6 + mov r8,4 + cmp rbx,12*16 + jg NEAR $L$seal_sse_main_loop + mov rcx,rbx + test rbx,rbx + je NEAR $L$seal_sse_128_tail_hash + mov rcx,6 + cmp rbx,8*16 + ja NEAR $L$seal_sse_tail_192 + cmp rbx,4*16 + ja NEAR $L$seal_sse_tail_128 + +$L$seal_sse_tail_64: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm12,XMMWORD[((160+96))+rbp] + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + +$L$seal_sse_tail_64_rounds_and_x2hash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_sse_tail_64_rounds_and_x1hash: + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] + dec rcx + jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash + dec r8 + jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + + jmp NEAR $L$seal_sse_128_tail_xor + +$L$seal_sse_tail_128: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm13,XMMWORD[((160+96))+rbp] + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + +$L$seal_sse_tail_128_rounds_and_x2hash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_sse_tail_128_rounds_and_x1hash: + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + + lea rdi,[16+rdi] + dec rcx + jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash + dec r8 + jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + movdqu xmm3,XMMWORD[((0 + 0))+rsi] + movdqu xmm7,XMMWORD[((16 + 0))+rsi] + movdqu xmm11,XMMWORD[((32 + 0))+rsi] + movdqu xmm15,XMMWORD[((48 + 0))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 0)+rdi],xmm1 + movdqu XMMWORD[(16 + 0)+rdi],xmm5 + movdqu XMMWORD[(32 + 0)+rdi],xmm9 + movdqu XMMWORD[(48 + 0)+rdi],xmm15 + + mov rcx,4*16 + sub rbx,4*16 + lea rsi,[64+rsi] + jmp NEAR $L$seal_sse_128_tail_hash + +$L$seal_sse_tail_192: + movdqa xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm4,XMMWORD[((160+48))+rbp] + movdqa xmm8,XMMWORD[((160+64))+rbp] + movdqa xmm1,xmm0 + movdqa xmm5,xmm4 + movdqa xmm9,xmm8 + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + movdqa xmm10,xmm8 + movdqa xmm14,XMMWORD[((160+96))+rbp] + paddd xmm14,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm14 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm12,xmm13 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa XMMWORD[(160+96)+rbp],xmm12 + movdqa XMMWORD[(160+112)+rbp],xmm13 + movdqa XMMWORD[(160+128)+rbp],xmm14 + +$L$seal_sse_tail_192_rounds_and_x2hash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_sse_tail_192_rounds_and_x1hash: + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 + + lea rdi,[16+rdi] + dec rcx + jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash + dec r8 + jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm6,XMMWORD[((160+48))+rbp] + paddd xmm10,XMMWORD[((160+64))+rbp] + paddd xmm14,XMMWORD[((160+128))+rbp] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm5,XMMWORD[((160+48))+rbp] + paddd xmm9,XMMWORD[((160+64))+rbp] + paddd xmm13,XMMWORD[((160+112))+rbp] + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm4,XMMWORD[((160+48))+rbp] + paddd xmm8,XMMWORD[((160+64))+rbp] + paddd xmm12,XMMWORD[((160+96))+rbp] + movdqu xmm3,XMMWORD[((0 + 0))+rsi] + movdqu xmm7,XMMWORD[((16 + 0))+rsi] + movdqu xmm11,XMMWORD[((32 + 0))+rsi] + movdqu xmm15,XMMWORD[((48 + 0))+rsi] + pxor xmm2,xmm3 + pxor xmm6,xmm7 + pxor xmm10,xmm11 + pxor xmm15,xmm14 + movdqu XMMWORD[(0 + 0)+rdi],xmm2 + movdqu XMMWORD[(16 + 0)+rdi],xmm6 + movdqu XMMWORD[(32 + 0)+rdi],xmm10 + movdqu XMMWORD[(48 + 0)+rdi],xmm15 + movdqu xmm3,XMMWORD[((0 + 64))+rsi] + movdqu xmm7,XMMWORD[((16 + 64))+rsi] + movdqu xmm11,XMMWORD[((32 + 64))+rsi] + movdqu xmm15,XMMWORD[((48 + 64))+rsi] + pxor xmm1,xmm3 + pxor xmm5,xmm7 + pxor xmm9,xmm11 + pxor xmm15,xmm13 + movdqu XMMWORD[(0 + 64)+rdi],xmm1 + movdqu XMMWORD[(16 + 64)+rdi],xmm5 + movdqu XMMWORD[(32 + 64)+rdi],xmm9 + movdqu XMMWORD[(48 + 64)+rdi],xmm15 + + mov rcx,8*16 + sub rbx,8*16 + lea rsi,[128+rsi] + +$L$seal_sse_128_tail_hash: + cmp rcx,16 + jb NEAR $L$seal_sse_128_tail_xor + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + sub rcx,16 + lea rdi,[16+rdi] + jmp NEAR $L$seal_sse_128_tail_hash + +$L$seal_sse_128_tail_xor: + cmp rbx,16 + jb NEAR $L$seal_sse_tail_16 + sub rbx,16 + + movdqu xmm3,XMMWORD[rsi] + pxor xmm0,xmm3 + movdqu XMMWORD[rdi],xmm0 + + add r10,QWORD[rdi] + adc r11,QWORD[8+rdi] + adc r12,1 + lea rsi,[16+rsi] + lea rdi,[16+rdi] + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + + movdqa xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm8,xmm12 + movdqa xmm12,xmm1 + movdqa xmm1,xmm5 + movdqa xmm5,xmm9 + movdqa xmm9,xmm13 + jmp NEAR $L$seal_sse_128_tail_xor + +$L$seal_sse_tail_16: + test rbx,rbx + jz NEAR $L$process_blocks_of_extra_in + + mov r8,rbx + mov rcx,rbx + lea rsi,[((-1))+rbx*1+rsi] + pxor xmm15,xmm15 +$L$seal_sse_tail_16_compose: + pslldq xmm15,1 + pinsrb xmm15,BYTE[rsi],0 + lea rsi,[((-1))+rsi] + dec rcx + jne NEAR $L$seal_sse_tail_16_compose + + + pxor xmm15,xmm0 + + + mov rcx,rbx + movdqu xmm0,xmm15 +$L$seal_sse_tail_16_extract: + pextrb XMMWORD[rdi],xmm0,0 + psrldq xmm0,1 + add rdi,1 + sub rcx,1 + jnz NEAR $L$seal_sse_tail_16_extract + + + + + + + + + mov r9,QWORD[((288 + 160 + 32))+rsp] + mov r14,QWORD[56+r9] + mov r13,QWORD[48+r9] + test r14,r14 + jz NEAR $L$process_partial_block + + mov r15,16 + sub r15,rbx + cmp r14,r15 + + jge NEAR $L$load_extra_in + mov r15,r14 + +$L$load_extra_in: + + + lea rsi,[((-1))+r15*1+r13] + + + add r13,r15 + sub r14,r15 + mov QWORD[48+r9],r13 + mov QWORD[56+r9],r14 + + + + add r8,r15 + + + pxor xmm11,xmm11 +$L$load_extra_load_loop: + pslldq xmm11,1 + pinsrb xmm11,BYTE[rsi],0 + lea rsi,[((-1))+rsi] + sub r15,1 + jnz NEAR $L$load_extra_load_loop + + + + + mov r15,rbx + +$L$load_extra_shift_loop: + pslldq xmm11,1 + sub r15,1 + jnz NEAR $L$load_extra_shift_loop + + + + + lea r15,[$L$and_masks] + shl rbx,4 + pand xmm15,XMMWORD[((-16))+rbx*1+r15] + + + por xmm15,xmm11 + + + +DB 102,77,15,126,253 + pextrq r14,xmm15,1 + add r10,r13 + adc r11,r14 + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + +$L$process_blocks_of_extra_in: + + mov r9,QWORD[((288+32+160 ))+rsp] + mov rsi,QWORD[48+r9] + mov r8,QWORD[56+r9] + mov rcx,r8 + shr r8,4 + +$L$process_extra_hash_loop: + jz NEAR process_extra_in_trailer + add r10,QWORD[((0+0))+rsi] + adc r11,QWORD[((8+0))+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rsi,[16+rsi] + sub r8,1 + jmp NEAR $L$process_extra_hash_loop +process_extra_in_trailer: + and rcx,15 + mov rbx,rcx + jz NEAR $L$do_length_block + lea rsi,[((-1))+rcx*1+rsi] + +$L$process_extra_in_trailer_load: + pslldq xmm15,1 + pinsrb xmm15,BYTE[rsi],0 + lea rsi,[((-1))+rsi] + sub rcx,1 + jnz NEAR $L$process_extra_in_trailer_load + +$L$process_partial_block: + + lea r15,[$L$and_masks] + shl rbx,4 + pand xmm15,XMMWORD[((-16))+rbx*1+r15] +DB 102,77,15,126,253 + pextrq r14,xmm15,1 + add r10,r13 + adc r11,r14 + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + +$L$do_length_block: + add r10,QWORD[((0+160+32))+rbp] + adc r11,QWORD[((8+160+32))+rbp] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + + mov r13,r10 + mov r14,r11 + mov r15,r12 + sub r10,-5 + sbb r11,-1 + sbb r12,3 + cmovc r10,r13 + cmovc r11,r14 + cmovc r12,r15 + + add r10,QWORD[((0+160+16))+rbp] + adc r11,QWORD[((8+160+16))+rbp] + + movaps xmm6,XMMWORD[((0+0))+rbp] + movaps xmm7,XMMWORD[((16+0))+rbp] + movaps xmm8,XMMWORD[((32+0))+rbp] + movaps xmm9,XMMWORD[((48+0))+rbp] + movaps xmm10,XMMWORD[((64+0))+rbp] + movaps xmm11,XMMWORD[((80+0))+rbp] + movaps xmm12,XMMWORD[((96+0))+rbp] + movaps xmm13,XMMWORD[((112+0))+rbp] + movaps xmm14,XMMWORD[((128+0))+rbp] + movaps xmm15,XMMWORD[((144+0))+rbp] + + + add rsp,288 + 160 + 32 + + + pop r9 + + mov QWORD[r9],r10 + mov QWORD[8+r9],r11 + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbx + + pop rbp + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + ret + +$L$seal_sse_128: + + movdqu xmm0,XMMWORD[$L$chacha20_consts] + movdqa xmm1,xmm0 + movdqa xmm2,xmm0 + movdqu xmm4,XMMWORD[r9] + movdqa xmm5,xmm4 + movdqa xmm6,xmm4 + movdqu xmm8,XMMWORD[16+r9] + movdqa xmm9,xmm8 + movdqa xmm10,xmm8 + movdqu xmm14,XMMWORD[32+r9] + movdqa xmm12,xmm14 + paddd xmm12,XMMWORD[$L$sse_inc] + movdqa xmm13,xmm12 + paddd xmm13,XMMWORD[$L$sse_inc] + movdqa xmm7,xmm4 + movdqa xmm11,xmm8 + movdqa xmm15,xmm12 + mov r10,10 + +$L$seal_sse_128_rounds: + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,4 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,12 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,4 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,12 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,4 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,12 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol16] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,12 + psrld xmm4,20 + pxor xmm4,xmm3 + paddd xmm0,xmm4 + pxor xmm12,xmm0 + pshufb xmm12,XMMWORD[$L$rol8] + paddd xmm8,xmm12 + pxor xmm4,xmm8 + movdqa xmm3,xmm4 + pslld xmm3,7 + psrld xmm4,25 + pxor xmm4,xmm3 +DB 102,15,58,15,228,12 +DB 102,69,15,58,15,192,8 +DB 102,69,15,58,15,228,4 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol16] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,12 + psrld xmm5,20 + pxor xmm5,xmm3 + paddd xmm1,xmm5 + pxor xmm13,xmm1 + pshufb xmm13,XMMWORD[$L$rol8] + paddd xmm9,xmm13 + pxor xmm5,xmm9 + movdqa xmm3,xmm5 + pslld xmm3,7 + psrld xmm5,25 + pxor xmm5,xmm3 +DB 102,15,58,15,237,12 +DB 102,69,15,58,15,201,8 +DB 102,69,15,58,15,237,4 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol16] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,12 + psrld xmm6,20 + pxor xmm6,xmm3 + paddd xmm2,xmm6 + pxor xmm14,xmm2 + pshufb xmm14,XMMWORD[$L$rol8] + paddd xmm10,xmm14 + pxor xmm6,xmm10 + movdqa xmm3,xmm6 + pslld xmm3,7 + psrld xmm6,25 + pxor xmm6,xmm3 +DB 102,15,58,15,246,12 +DB 102,69,15,58,15,210,8 +DB 102,69,15,58,15,246,4 + + dec r10 + jnz NEAR $L$seal_sse_128_rounds + paddd xmm0,XMMWORD[$L$chacha20_consts] + paddd xmm1,XMMWORD[$L$chacha20_consts] + paddd xmm2,XMMWORD[$L$chacha20_consts] + paddd xmm4,xmm7 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + paddd xmm8,xmm11 + paddd xmm9,xmm11 + paddd xmm12,xmm15 + paddd xmm15,XMMWORD[$L$sse_inc] + paddd xmm13,xmm15 + + pand xmm2,XMMWORD[$L$clamp] + movdqa XMMWORD[(160+0)+rbp],xmm2 + movdqa XMMWORD[(160+16)+rbp],xmm6 + + mov r8,r8 + call poly_hash_ad_internal + jmp NEAR $L$seal_sse_128_tail_xor +$L$SEH_end_chacha20_poly1305_seal: + + + + +ALIGN 64 +chacha20_poly1305_open_avx2: + + + + + + + + + + + + + vzeroupper + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vbroadcasti128 ymm4,XMMWORD[r9] + vbroadcasti128 ymm8,XMMWORD[16+r9] + vbroadcasti128 ymm12,XMMWORD[32+r9] + vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] + cmp rbx,6*32 + jbe NEAR $L$open_avx2_192 + cmp rbx,10*32 + jbe NEAR $L$open_avx2_320 + + vmovdqa YMMWORD[(160+64)+rbp],ymm4 + vmovdqa YMMWORD[(160+96)+rbp],ymm8 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + mov r10,10 +$L$open_avx2_init_rounds: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + + dec r10 + jne NEAR $L$open_avx2_init_rounds + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + + vperm2i128 ymm3,ymm4,ymm0,0x02 + + vpand ymm3,ymm3,YMMWORD[$L$clamp] + vmovdqa YMMWORD[(160+0)+rbp],ymm3 + + vperm2i128 ymm0,ymm4,ymm0,0x13 + vperm2i128 ymm4,ymm12,ymm8,0x13 + + mov r8,r8 + call poly_hash_ad_internal + + xor rcx,rcx +$L$open_avx2_init_hash: + add r10,QWORD[((0+0))+rcx*1+rsi] + adc r11,QWORD[((8+0))+rcx*1+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + add rcx,16 + cmp rcx,2*32 + jne NEAR $L$open_avx2_init_hash + + vpxor ymm0,ymm0,YMMWORD[rsi] + vpxor ymm4,ymm4,YMMWORD[32+rsi] + + vmovdqu YMMWORD[rdi],ymm0 + vmovdqu YMMWORD[32+rdi],ymm4 + lea rsi,[64+rsi] + lea rdi,[64+rdi] + sub rbx,2*32 +$L$open_avx2_main_loop: + + cmp rbx,16*32 + jb NEAR $L$open_avx2_main_loop_done + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm3,ymm0 + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm14,ymm12,ymm15 + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+256)+rbp],ymm15 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + + xor rcx,rcx +$L$open_avx2_main_loop_rounds: + add r10,QWORD[((0+0))+rcx*1+rsi] + adc r11,QWORD[((8+0))+rcx*1+rsi] + adc r12,1 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + add r15,rax + adc r9,rdx + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + add r10,QWORD[((0+16))+rcx*1+rsi] + adc r11,QWORD[((8+16))+rcx*1+rsi] + adc r12,1 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,4 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,12 + vpalignr ymm6,ymm6,ymm6,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,12 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpalignr ymm5,ymm5,ymm5,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm4,ymm4,ymm4,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + add r15,rax + adc r9,rdx + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + add r10,QWORD[((0+32))+rcx*1+rsi] + adc r11,QWORD[((8+32))+rcx*1+rsi] + adc r12,1 + + lea rcx,[48+rcx] + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + add r15,rax + adc r9,rdx + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,12 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,4 + vpalignr ymm6,ymm6,ymm6,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm5,ymm5,ymm5,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm4,ymm4,ymm4,12 + vpalignr ymm8,ymm8,ymm8,8 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpalignr ymm12,ymm12,ymm12,4 + + cmp rcx,10*6*8 + jne NEAR $L$open_avx2_main_loop_rounds + vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] + vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] + vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] + vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + + vmovdqa YMMWORD[(160+128)+rbp],ymm0 + add r10,QWORD[((0+480))+rsi] + adc r11,QWORD[((8+480))+rsi] + adc r12,1 + vperm2i128 ymm0,ymm7,ymm3,0x02 + vperm2i128 ymm7,ymm7,ymm3,0x13 + vperm2i128 ymm3,ymm15,ymm11,0x02 + vperm2i128 ymm11,ymm15,ymm11,0x13 + vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] + vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] + vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] + vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm0 + vmovdqu YMMWORD[(32+0)+rdi],ymm3 + vmovdqu YMMWORD[(64+0)+rdi],ymm7 + vmovdqu YMMWORD[(96+0)+rdi],ymm11 + + vmovdqa ymm0,YMMWORD[((160+128))+rbp] + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vperm2i128 ymm3,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] + vmovdqu YMMWORD[(0+128)+rdi],ymm3 + vmovdqu YMMWORD[(32+128)+rdi],ymm2 + vmovdqu YMMWORD[(64+128)+rdi],ymm6 + vmovdqu YMMWORD[(96+128)+rdi],ymm10 + add r10,QWORD[((0+480+16))+rsi] + adc r11,QWORD[((8+480+16))+rsi] + adc r12,1 + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] + vmovdqu YMMWORD[(0+256)+rdi],ymm3 + vmovdqu YMMWORD[(32+256)+rdi],ymm1 + vmovdqu YMMWORD[(64+256)+rdi],ymm5 + vmovdqu YMMWORD[(96+256)+rdi],ymm9 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vperm2i128 ymm3,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm12,ymm8,0x02 + vperm2i128 ymm8,ymm12,ymm8,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] + vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] + vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] + vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] + vmovdqu YMMWORD[(0+384)+rdi],ymm3 + vmovdqu YMMWORD[(32+384)+rdi],ymm0 + vmovdqu YMMWORD[(64+384)+rdi],ymm4 + vmovdqu YMMWORD[(96+384)+rdi],ymm8 + + lea rsi,[512+rsi] + lea rdi,[512+rdi] + sub rbx,16*32 + jmp NEAR $L$open_avx2_main_loop +$L$open_avx2_main_loop_done: + test rbx,rbx + vzeroupper + je NEAR $L$open_sse_finalize + + cmp rbx,12*32 + ja NEAR $L$open_avx2_tail_512 + cmp rbx,8*32 + ja NEAR $L$open_avx2_tail_384 + cmp rbx,4*32 + ja NEAR $L$open_avx2_tail_256 + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + + xor r8,r8 + mov rcx,rbx + and rcx,-16 + test rcx,rcx + je NEAR $L$open_avx2_tail_128_rounds +$L$open_avx2_tail_128_rounds_and_x1hash: + add r10,QWORD[((0+0))+r8*1+rsi] + adc r11,QWORD[((8+0))+r8*1+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + +$L$open_avx2_tail_128_rounds: + add r8,16 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + + cmp r8,rcx + jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash + cmp r8,160 + jne NEAR $L$open_avx2_tail_128_rounds + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + jmp NEAR $L$open_avx2_tail_128_xor + +$L$open_avx2_tail_256: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + + mov QWORD[((160+128))+rbp],rbx + mov rcx,rbx + sub rcx,4*32 + shr rcx,4 + mov r8,10 + cmp rcx,10 + cmovg rcx,r8 + mov rbx,rsi + xor r8,r8 +$L$open_avx2_tail_256_rounds_and_x1hash: + add r10,QWORD[((0+0))+rbx] + adc r11,QWORD[((8+0))+rbx] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rbx,[16+rbx] +$L$open_avx2_tail_256_rounds: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + + inc r8 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,12 + + cmp r8,rcx + jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash + cmp r8,10 + jne NEAR $L$open_avx2_tail_256_rounds + mov r8,rbx + sub rbx,rsi + mov rcx,rbx + mov rbx,QWORD[((160+128))+rbp] +$L$open_avx2_tail_256_hash: + add rcx,16 + cmp rcx,rbx + jg NEAR $L$open_avx2_tail_256_done + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea r8,[16+r8] + jmp NEAR $L$open_avx2_tail_256_hash +$L$open_avx2_tail_256_done: + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm3 + vmovdqu YMMWORD[(32+0)+rdi],ymm1 + vmovdqu YMMWORD[(64+0)+rdi],ymm5 + vmovdqu YMMWORD[(96+0)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + lea rsi,[128+rsi] + lea rdi,[128+rdi] + sub rbx,4*32 + jmp NEAR $L$open_avx2_tail_128_xor + +$L$open_avx2_tail_384: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + + mov QWORD[((160+128))+rbp],rbx + mov rcx,rbx + sub rcx,8*32 + shr rcx,4 + add rcx,6 + mov r8,10 + cmp rcx,10 + cmovg rcx,r8 + mov rbx,rsi + xor r8,r8 +$L$open_avx2_tail_384_rounds_and_x2hash: + add r10,QWORD[((0+0))+rbx] + adc r11,QWORD[((8+0))+rbx] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rbx,[16+rbx] +$L$open_avx2_tail_384_rounds_and_x1hash: + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + add r10,QWORD[((0+0))+rbx] + adc r11,QWORD[((8+0))+rbx] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rbx,[16+rbx] + inc r8 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + + cmp r8,rcx + jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash + cmp r8,10 + jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash + mov r8,rbx + sub rbx,rsi + mov rcx,rbx + mov rbx,QWORD[((160+128))+rbp] +$L$open_avx2_384_tail_hash: + add rcx,16 + cmp rcx,rbx + jg NEAR $L$open_avx2_384_tail_done + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea r8,[16+r8] + jmp NEAR $L$open_avx2_384_tail_hash +$L$open_avx2_384_tail_done: + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vperm2i128 ymm3,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm3 + vmovdqu YMMWORD[(32+0)+rdi],ymm2 + vmovdqu YMMWORD[(64+0)+rdi],ymm6 + vmovdqu YMMWORD[(96+0)+rdi],ymm10 + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] + vmovdqu YMMWORD[(0+128)+rdi],ymm3 + vmovdqu YMMWORD[(32+128)+rdi],ymm1 + vmovdqu YMMWORD[(64+128)+rdi],ymm5 + vmovdqu YMMWORD[(96+128)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + lea rsi,[256+rsi] + lea rdi,[256+rdi] + sub rbx,8*32 + jmp NEAR $L$open_avx2_tail_128_xor + +$L$open_avx2_tail_512: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm3,ymm0 + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm14,ymm12,ymm15 + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+256)+rbp],ymm15 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + + xor rcx,rcx + mov r8,rsi +$L$open_avx2_tail_512_rounds_and_x2hash: + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea r8,[16+r8] +$L$open_avx2_tail_512_rounds_and_x1hash: + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,4 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,12 + vpalignr ymm6,ymm6,ymm6,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm5,ymm5,ymm5,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm4,ymm4,ymm4,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + add r10,QWORD[((0+16))+r8] + adc r11,QWORD[((8+16))+r8] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea r8,[32+r8] + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,12 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,4 + vpalignr ymm6,ymm6,ymm6,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm5,ymm5,ymm5,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm4,ymm4,ymm4,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,4 + + inc rcx + cmp rcx,4 + jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash + cmp rcx,10 + jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash + mov rcx,rbx + sub rcx,12*32 + and rcx,-16 +$L$open_avx2_tail_512_hash: + test rcx,rcx + je NEAR $L$open_avx2_tail_512_done + add r10,QWORD[((0+0))+r8] + adc r11,QWORD[((8+0))+r8] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea r8,[16+r8] + sub rcx,2*8 + jmp NEAR $L$open_avx2_tail_512_hash +$L$open_avx2_tail_512_done: + vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] + vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] + vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] + vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + + vmovdqa YMMWORD[(160+128)+rbp],ymm0 + vperm2i128 ymm0,ymm7,ymm3,0x02 + vperm2i128 ymm7,ymm7,ymm3,0x13 + vperm2i128 ymm3,ymm15,ymm11,0x02 + vperm2i128 ymm11,ymm15,ymm11,0x13 + vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] + vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] + vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] + vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm0 + vmovdqu YMMWORD[(32+0)+rdi],ymm3 + vmovdqu YMMWORD[(64+0)+rdi],ymm7 + vmovdqu YMMWORD[(96+0)+rdi],ymm11 + + vmovdqa ymm0,YMMWORD[((160+128))+rbp] + vperm2i128 ymm3,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] + vmovdqu YMMWORD[(0+128)+rdi],ymm3 + vmovdqu YMMWORD[(32+128)+rdi],ymm2 + vmovdqu YMMWORD[(64+128)+rdi],ymm6 + vmovdqu YMMWORD[(96+128)+rdi],ymm10 + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] + vmovdqu YMMWORD[(0+256)+rdi],ymm3 + vmovdqu YMMWORD[(32+256)+rdi],ymm1 + vmovdqu YMMWORD[(64+256)+rdi],ymm5 + vmovdqu YMMWORD[(96+256)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + lea rsi,[384+rsi] + lea rdi,[384+rdi] + sub rbx,12*32 +$L$open_avx2_tail_128_xor: + cmp rbx,32 + jb NEAR $L$open_avx2_tail_32_xor + sub rbx,32 + vpxor ymm0,ymm0,YMMWORD[rsi] + vmovdqu YMMWORD[rdi],ymm0 + lea rsi,[32+rsi] + lea rdi,[32+rdi] + vmovdqa ymm0,ymm4 + vmovdqa ymm4,ymm8 + vmovdqa ymm8,ymm12 + jmp NEAR $L$open_avx2_tail_128_xor +$L$open_avx2_tail_32_xor: + cmp rbx,16 + vmovdqa xmm1,xmm0 + jb NEAR $L$open_avx2_exit + sub rbx,16 + + vpxor xmm1,xmm0,XMMWORD[rsi] + vmovdqu XMMWORD[rdi],xmm1 + lea rsi,[16+rsi] + lea rdi,[16+rdi] + vperm2i128 ymm0,ymm0,ymm0,0x11 + vmovdqa xmm1,xmm0 +$L$open_avx2_exit: + vzeroupper + jmp NEAR $L$open_sse_tail_16 + +$L$open_avx2_192: + vmovdqa ymm1,ymm0 + vmovdqa ymm2,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm6,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm10,ymm8 + vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] + vmovdqa ymm11,ymm12 + vmovdqa ymm15,ymm13 + mov r10,10 +$L$open_avx2_192_rounds: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + + dec r10 + jne NEAR $L$open_avx2_192_rounds + vpaddd ymm0,ymm0,ymm2 + vpaddd ymm1,ymm1,ymm2 + vpaddd ymm4,ymm4,ymm6 + vpaddd ymm5,ymm5,ymm6 + vpaddd ymm8,ymm8,ymm10 + vpaddd ymm9,ymm9,ymm10 + vpaddd ymm12,ymm12,ymm11 + vpaddd ymm13,ymm13,ymm15 + vperm2i128 ymm3,ymm4,ymm0,0x02 + + vpand ymm3,ymm3,YMMWORD[$L$clamp] + vmovdqa YMMWORD[(160+0)+rbp],ymm3 + + vperm2i128 ymm0,ymm4,ymm0,0x13 + vperm2i128 ymm4,ymm12,ymm8,0x13 + vperm2i128 ymm8,ymm5,ymm1,0x02 + vperm2i128 ymm12,ymm13,ymm9,0x02 + vperm2i128 ymm1,ymm5,ymm1,0x13 + vperm2i128 ymm5,ymm13,ymm9,0x13 +$L$open_avx2_short: + mov r8,r8 + call poly_hash_ad_internal +$L$open_avx2_short_hash_and_xor_loop: + cmp rbx,32 + jb NEAR $L$open_avx2_short_tail_32 + sub rbx,32 + add r10,QWORD[((0+0))+rsi] + adc r11,QWORD[((8+0))+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + add r10,QWORD[((0+16))+rsi] + adc r11,QWORD[((8+16))+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + + vpxor ymm0,ymm0,YMMWORD[rsi] + vmovdqu YMMWORD[rdi],ymm0 + lea rsi,[32+rsi] + lea rdi,[32+rdi] + + vmovdqa ymm0,ymm4 + vmovdqa ymm4,ymm8 + vmovdqa ymm8,ymm12 + vmovdqa ymm12,ymm1 + vmovdqa ymm1,ymm5 + vmovdqa ymm5,ymm9 + vmovdqa ymm9,ymm13 + vmovdqa ymm13,ymm2 + vmovdqa ymm2,ymm6 + jmp NEAR $L$open_avx2_short_hash_and_xor_loop +$L$open_avx2_short_tail_32: + cmp rbx,16 + vmovdqa xmm1,xmm0 + jb NEAR $L$open_avx2_short_tail_32_exit + sub rbx,16 + add r10,QWORD[((0+0))+rsi] + adc r11,QWORD[((8+0))+rsi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + vpxor xmm3,xmm0,XMMWORD[rsi] + vmovdqu XMMWORD[rdi],xmm3 + lea rsi,[16+rsi] + lea rdi,[16+rdi] + vextracti128 xmm1,ymm0,1 +$L$open_avx2_short_tail_32_exit: + vzeroupper + jmp NEAR $L$open_sse_tail_16 + +$L$open_avx2_320: + vmovdqa ymm1,ymm0 + vmovdqa ymm2,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm6,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm10,ymm8 + vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + mov r10,10 +$L$open_avx2_320_rounds: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,12 + + dec r10 + jne NEAR $L$open_avx2_320_rounds + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,ymm7 + vpaddd ymm5,ymm5,ymm7 + vpaddd ymm6,ymm6,ymm7 + vpaddd ymm8,ymm8,ymm11 + vpaddd ymm9,ymm9,ymm11 + vpaddd ymm10,ymm10,ymm11 + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vperm2i128 ymm3,ymm4,ymm0,0x02 + + vpand ymm3,ymm3,YMMWORD[$L$clamp] + vmovdqa YMMWORD[(160+0)+rbp],ymm3 + + vperm2i128 ymm0,ymm4,ymm0,0x13 + vperm2i128 ymm4,ymm12,ymm8,0x13 + vperm2i128 ymm8,ymm5,ymm1,0x02 + vperm2i128 ymm12,ymm13,ymm9,0x02 + vperm2i128 ymm1,ymm5,ymm1,0x13 + vperm2i128 ymm5,ymm13,ymm9,0x13 + vperm2i128 ymm9,ymm6,ymm2,0x02 + vperm2i128 ymm13,ymm14,ymm10,0x02 + vperm2i128 ymm2,ymm6,ymm2,0x13 + vperm2i128 ymm6,ymm14,ymm10,0x13 + jmp NEAR $L$open_avx2_short + + + + + +ALIGN 64 +chacha20_poly1305_seal_avx2: + + + + + + + + + + + + + vzeroupper + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vbroadcasti128 ymm4,XMMWORD[r9] + vbroadcasti128 ymm8,XMMWORD[16+r9] + vbroadcasti128 ymm12,XMMWORD[32+r9] + vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] + cmp rbx,6*32 + jbe NEAR $L$seal_avx2_192 + cmp rbx,10*32 + jbe NEAR $L$seal_avx2_320 + vmovdqa ymm1,ymm0 + vmovdqa ymm2,ymm0 + vmovdqa ymm3,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm6,ymm4 + vmovdqa ymm7,ymm4 + vmovdqa YMMWORD[(160+64)+rbp],ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm10,ymm8 + vmovdqa ymm11,ymm8 + vmovdqa YMMWORD[(160+96)+rbp],ymm8 + vmovdqa ymm15,ymm12 + vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc] + vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc] + vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc] + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + vmovdqa YMMWORD[(160+256)+rbp],ymm15 + mov r10,10 +$L$seal_avx2_init_rounds: + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,4 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,12 + vpalignr ymm6,ymm6,ymm6,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm5,ymm5,ymm5,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm4,ymm4,ymm4,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,12 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,4 + vpalignr ymm6,ymm6,ymm6,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm5,ymm5,ymm5,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm4,ymm4,ymm4,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,4 + + dec r10 + jnz NEAR $L$seal_avx2_init_rounds + vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] + vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] + vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] + vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + + vperm2i128 ymm11,ymm15,ymm11,0x13 + vperm2i128 ymm15,ymm7,ymm3,0x02 + vperm2i128 ymm3,ymm7,ymm3,0x13 + vpand ymm15,ymm15,YMMWORD[$L$clamp] + vmovdqa YMMWORD[(160+0)+rbp],ymm15 + mov r8,r8 + call poly_hash_ad_internal + + vpxor ymm3,ymm3,YMMWORD[rsi] + vpxor ymm11,ymm11,YMMWORD[32+rsi] + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm11 + vperm2i128 ymm15,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi] + vmovdqu YMMWORD[(0+64)+rdi],ymm15 + vmovdqu YMMWORD[(32+64)+rdi],ymm2 + vmovdqu YMMWORD[(64+64)+rdi],ymm6 + vmovdqu YMMWORD[(96+64)+rdi],ymm10 + vperm2i128 ymm15,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi] + vmovdqu YMMWORD[(0+192)+rdi],ymm15 + vmovdqu YMMWORD[(32+192)+rdi],ymm1 + vmovdqu YMMWORD[(64+192)+rdi],ymm5 + vmovdqu YMMWORD[(96+192)+rdi],ymm9 + vperm2i128 ymm15,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm15 + + lea rsi,[320+rsi] + sub rbx,10*32 + mov rcx,10*32 + cmp rbx,4*32 + jbe NEAR $L$seal_avx2_short_hash_remainder + vpxor ymm0,ymm0,YMMWORD[rsi] + vpxor ymm4,ymm4,YMMWORD[32+rsi] + vpxor ymm8,ymm8,YMMWORD[64+rsi] + vpxor ymm12,ymm12,YMMWORD[96+rsi] + vmovdqu YMMWORD[320+rdi],ymm0 + vmovdqu YMMWORD[352+rdi],ymm4 + vmovdqu YMMWORD[384+rdi],ymm8 + vmovdqu YMMWORD[416+rdi],ymm12 + lea rsi,[128+rsi] + sub rbx,4*32 + mov rcx,8 + mov r8,2 + cmp rbx,4*32 + jbe NEAR $L$seal_avx2_tail_128 + cmp rbx,8*32 + jbe NEAR $L$seal_avx2_tail_256 + cmp rbx,12*32 + jbe NEAR $L$seal_avx2_tail_384 + cmp rbx,16*32 + jbe NEAR $L$seal_avx2_tail_512 + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm3,ymm0 + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm14,ymm12,ymm15 + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+256)+rbp],ymm15 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,4 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,12 + vpalignr ymm6,ymm6,ymm6,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm5,ymm5,ymm5,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm4,ymm4,ymm4,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,12 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,4 + vpalignr ymm6,ymm6,ymm6,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm5,ymm5,ymm5,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm4,ymm4,ymm4,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,4 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + + sub rdi,16 + mov rcx,9 + jmp NEAR $L$seal_avx2_main_loop_rounds_entry +ALIGN 32 +$L$seal_avx2_main_loop: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm3,ymm0 + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm14,ymm12,ymm15 + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+256)+rbp],ymm15 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + + mov rcx,10 +ALIGN 32 +$L$seal_avx2_main_loop_rounds: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + add r15,rax + adc r9,rdx + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + +$L$seal_avx2_main_loop_rounds_entry: + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,4 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,12 + vpalignr ymm6,ymm6,ymm6,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,12 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpalignr ymm5,ymm5,ymm5,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm4,ymm4,ymm4,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + add r15,rax + adc r9,rdx + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + add r10,QWORD[((0+32))+rdi] + adc r11,QWORD[((8+32))+rdi] + adc r12,1 + + lea rdi,[48+rdi] + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + add r15,rax + adc r9,rdx + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,12 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,4 + vpalignr ymm6,ymm6,ymm6,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm5,ymm5,ymm5,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm4,ymm4,ymm4,12 + vpalignr ymm8,ymm8,ymm8,8 + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpalignr ymm12,ymm12,ymm12,4 + + dec rcx + jne NEAR $L$seal_avx2_main_loop_rounds + vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] + vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] + vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] + vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + + vmovdqa YMMWORD[(160+128)+rbp],ymm0 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[32+rdi] + vperm2i128 ymm0,ymm7,ymm3,0x02 + vperm2i128 ymm7,ymm7,ymm3,0x13 + vperm2i128 ymm3,ymm15,ymm11,0x02 + vperm2i128 ymm11,ymm15,ymm11,0x13 + vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] + vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] + vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] + vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm0 + vmovdqu YMMWORD[(32+0)+rdi],ymm3 + vmovdqu YMMWORD[(64+0)+rdi],ymm7 + vmovdqu YMMWORD[(96+0)+rdi],ymm11 + + vmovdqa ymm0,YMMWORD[((160+128))+rbp] + vperm2i128 ymm3,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] + vmovdqu YMMWORD[(0+128)+rdi],ymm3 + vmovdqu YMMWORD[(32+128)+rdi],ymm2 + vmovdqu YMMWORD[(64+128)+rdi],ymm6 + vmovdqu YMMWORD[(96+128)+rdi],ymm10 + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] + vmovdqu YMMWORD[(0+256)+rdi],ymm3 + vmovdqu YMMWORD[(32+256)+rdi],ymm1 + vmovdqu YMMWORD[(64+256)+rdi],ymm5 + vmovdqu YMMWORD[(96+256)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm12,ymm8,0x02 + vperm2i128 ymm8,ymm12,ymm8,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] + vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] + vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] + vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] + vmovdqu YMMWORD[(0+384)+rdi],ymm3 + vmovdqu YMMWORD[(32+384)+rdi],ymm0 + vmovdqu YMMWORD[(64+384)+rdi],ymm4 + vmovdqu YMMWORD[(96+384)+rdi],ymm8 + + lea rsi,[512+rsi] + sub rbx,16*32 + cmp rbx,16*32 + jg NEAR $L$seal_avx2_main_loop + + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[32+rdi] + mov rcx,10 + xor r8,r8 + + cmp rbx,12*32 + ja NEAR $L$seal_avx2_tail_512 + cmp rbx,8*32 + ja NEAR $L$seal_avx2_tail_384 + cmp rbx,4*32 + ja NEAR $L$seal_avx2_tail_256 + +$L$seal_avx2_tail_128: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + +$L$seal_avx2_tail_128_rounds_and_3xhash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_avx2_tail_128_rounds_and_2xhash: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[32+rdi] + dec rcx + jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash + dec r8 + jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + jmp NEAR $L$seal_avx2_short_loop + +$L$seal_avx2_tail_256: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + +$L$seal_avx2_tail_256_rounds_and_3xhash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_avx2_tail_256_rounds_and_2xhash: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[32+rdi] + dec rcx + jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash + dec r8 + jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm3 + vmovdqu YMMWORD[(32+0)+rdi],ymm1 + vmovdqu YMMWORD[(64+0)+rdi],ymm5 + vmovdqu YMMWORD[(96+0)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + mov rcx,4*32 + lea rsi,[128+rsi] + sub rbx,4*32 + jmp NEAR $L$seal_avx2_short_hash_remainder + +$L$seal_avx2_tail_384: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + +$L$seal_avx2_tail_384_rounds_and_3xhash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_avx2_tail_384_rounds_and_2xhash: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,12 + + lea rdi,[32+rdi] + dec rcx + jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash + dec r8 + jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vperm2i128 ymm3,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm3 + vmovdqu YMMWORD[(32+0)+rdi],ymm2 + vmovdqu YMMWORD[(64+0)+rdi],ymm6 + vmovdqu YMMWORD[(96+0)+rdi],ymm10 + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] + vmovdqu YMMWORD[(0+128)+rdi],ymm3 + vmovdqu YMMWORD[(32+128)+rdi],ymm1 + vmovdqu YMMWORD[(64+128)+rdi],ymm5 + vmovdqu YMMWORD[(96+128)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + mov rcx,8*32 + lea rsi,[256+rsi] + sub rbx,8*32 + jmp NEAR $L$seal_avx2_short_hash_remainder + +$L$seal_avx2_tail_512: + vmovdqa ymm0,YMMWORD[$L$chacha20_consts] + vmovdqa ymm4,YMMWORD[((160+64))+rbp] + vmovdqa ymm8,YMMWORD[((160+96))+rbp] + vmovdqa ymm1,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm2,ymm0 + vmovdqa ymm6,ymm4 + vmovdqa ymm10,ymm8 + vmovdqa ymm3,ymm0 + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm14,ymm12,ymm15 + vpaddd ymm13,ymm12,ymm14 + vpaddd ymm12,ymm12,ymm13 + vmovdqa YMMWORD[(160+256)+rbp],ymm15 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + +$L$seal_avx2_tail_512_rounds_and_3xhash: + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + add r15,rax + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] +$L$seal_avx2_tail_512_rounds_and_2xhash: + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,4 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,12 + vpalignr ymm6,ymm6,ymm6,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm5,ymm5,ymm5,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm4,ymm4,ymm4,4 + add r15,rax + adc r9,rdx + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,12 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vmovdqa ymm8,YMMWORD[$L$rol16] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,20 + vpslld ymm7,ymm7,32-20 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,20 + vpslld ymm6,ymm6,32-20 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,20 + vpslld ymm5,ymm5,32-20 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,20 + vpslld ymm4,ymm4,32-20 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[$L$rol8] + vpaddd ymm3,ymm3,ymm7 + vpaddd ymm2,ymm2,ymm6 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + vpaddd ymm1,ymm1,ymm5 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm15,ymm15,ymm3 + vpxor ymm14,ymm14,ymm2 + vpxor ymm13,ymm13,ymm1 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm15,ymm15,ymm8 + vpshufb ymm14,ymm14,ymm8 + vpshufb ymm13,ymm13,ymm8 + vpshufb ymm12,ymm12,ymm8 + vpaddd ymm11,ymm11,ymm15 + vpaddd ymm10,ymm10,ymm14 + vpaddd ymm9,ymm9,ymm13 + vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] + vpxor ymm7,ymm7,ymm11 + vpxor ymm6,ymm6,ymm10 + vpxor ymm5,ymm5,ymm9 + vpxor ymm4,ymm4,ymm8 + vmovdqa YMMWORD[(160+128)+rbp],ymm8 + vpsrld ymm8,ymm7,25 + mov rdx,QWORD[((0+160+0))+rbp] + mov r15,rdx + mulx r14,r13,r10 + mulx rdx,rax,r11 + imul r15,r12 + add r14,rax + adc r15,rdx + vpslld ymm7,ymm7,32-25 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm6,25 + vpslld ymm6,ymm6,32-25 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm5,25 + vpslld ymm5,ymm5,32-25 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm4,25 + vpslld ymm4,ymm4,32-25 + vpxor ymm4,ymm4,ymm8 + vmovdqa ymm8,YMMWORD[((160+128))+rbp] + vpalignr ymm7,ymm7,ymm7,12 + vpalignr ymm11,ymm11,ymm11,8 + vpalignr ymm15,ymm15,ymm15,4 + vpalignr ymm6,ymm6,ymm6,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm5,ymm5,ymm5,12 + vpalignr ymm9,ymm9,ymm9,8 + mov rdx,QWORD[((8+160+0))+rbp] + mulx rax,r10,r10 + add r14,r10 + mulx r9,r11,r11 + adc r15,r11 + adc r9,0 + imul rdx,r12 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm4,ymm4,ymm4,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm12,ymm12,ymm12,4 + + + + + + + + + + + + + + + + + add r15,rax + adc r9,rdx + + + + + + + + + + + + + + + + + + + + + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[32+rdi] + dec rcx + jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash + dec r8 + jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash + vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] + vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] + vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] + vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] + vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] + vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] + vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + + vmovdqa YMMWORD[(160+128)+rbp],ymm0 + vperm2i128 ymm0,ymm7,ymm3,0x02 + vperm2i128 ymm7,ymm7,ymm3,0x13 + vperm2i128 ymm3,ymm15,ymm11,0x02 + vperm2i128 ymm11,ymm15,ymm11,0x13 + vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] + vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] + vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] + vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] + vmovdqu YMMWORD[(0+0)+rdi],ymm0 + vmovdqu YMMWORD[(32+0)+rdi],ymm3 + vmovdqu YMMWORD[(64+0)+rdi],ymm7 + vmovdqu YMMWORD[(96+0)+rdi],ymm11 + + vmovdqa ymm0,YMMWORD[((160+128))+rbp] + vperm2i128 ymm3,ymm6,ymm2,0x02 + vperm2i128 ymm6,ymm6,ymm2,0x13 + vperm2i128 ymm2,ymm14,ymm10,0x02 + vperm2i128 ymm10,ymm14,ymm10,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] + vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] + vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] + vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] + vmovdqu YMMWORD[(0+128)+rdi],ymm3 + vmovdqu YMMWORD[(32+128)+rdi],ymm2 + vmovdqu YMMWORD[(64+128)+rdi],ymm6 + vmovdqu YMMWORD[(96+128)+rdi],ymm10 + vperm2i128 ymm3,ymm5,ymm1,0x02 + vperm2i128 ymm5,ymm5,ymm1,0x13 + vperm2i128 ymm1,ymm13,ymm9,0x02 + vperm2i128 ymm9,ymm13,ymm9,0x13 + vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] + vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] + vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] + vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] + vmovdqu YMMWORD[(0+256)+rdi],ymm3 + vmovdqu YMMWORD[(32+256)+rdi],ymm1 + vmovdqu YMMWORD[(64+256)+rdi],ymm5 + vmovdqu YMMWORD[(96+256)+rdi],ymm9 + vperm2i128 ymm3,ymm4,ymm0,0x13 + vperm2i128 ymm0,ymm4,ymm0,0x02 + vperm2i128 ymm4,ymm12,ymm8,0x02 + vperm2i128 ymm12,ymm12,ymm8,0x13 + vmovdqa ymm8,ymm3 + + mov rcx,12*32 + lea rsi,[384+rsi] + sub rbx,12*32 + jmp NEAR $L$seal_avx2_short_hash_remainder + +$L$seal_avx2_320: + vmovdqa ymm1,ymm0 + vmovdqa ymm2,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm6,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm10,ymm8 + vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] + vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] + vmovdqa ymm7,ymm4 + vmovdqa ymm11,ymm8 + vmovdqa YMMWORD[(160+160)+rbp],ymm12 + vmovdqa YMMWORD[(160+192)+rbp],ymm13 + vmovdqa YMMWORD[(160+224)+rbp],ymm14 + mov r10,10 +$L$seal_avx2_320_rounds: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,12 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol16] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpsrld ymm3,ymm6,20 + vpslld ymm6,ymm6,12 + vpxor ymm6,ymm6,ymm3 + vpaddd ymm2,ymm2,ymm6 + vpxor ymm14,ymm14,ymm2 + vpshufb ymm14,ymm14,YMMWORD[$L$rol8] + vpaddd ymm10,ymm10,ymm14 + vpxor ymm6,ymm6,ymm10 + vpslld ymm3,ymm6,7 + vpsrld ymm6,ymm6,25 + vpxor ymm6,ymm6,ymm3 + vpalignr ymm14,ymm14,ymm14,4 + vpalignr ymm10,ymm10,ymm10,8 + vpalignr ymm6,ymm6,ymm6,12 + + dec r10 + jne NEAR $L$seal_avx2_320_rounds + vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] + vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] + vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] + vpaddd ymm4,ymm4,ymm7 + vpaddd ymm5,ymm5,ymm7 + vpaddd ymm6,ymm6,ymm7 + vpaddd ymm8,ymm8,ymm11 + vpaddd ymm9,ymm9,ymm11 + vpaddd ymm10,ymm10,ymm11 + vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] + vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] + vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] + vperm2i128 ymm3,ymm4,ymm0,0x02 + + vpand ymm3,ymm3,YMMWORD[$L$clamp] + vmovdqa YMMWORD[(160+0)+rbp],ymm3 + + vperm2i128 ymm0,ymm4,ymm0,0x13 + vperm2i128 ymm4,ymm12,ymm8,0x13 + vperm2i128 ymm8,ymm5,ymm1,0x02 + vperm2i128 ymm12,ymm13,ymm9,0x02 + vperm2i128 ymm1,ymm5,ymm1,0x13 + vperm2i128 ymm5,ymm13,ymm9,0x13 + vperm2i128 ymm9,ymm6,ymm2,0x02 + vperm2i128 ymm13,ymm14,ymm10,0x02 + vperm2i128 ymm2,ymm6,ymm2,0x13 + vperm2i128 ymm6,ymm14,ymm10,0x13 + jmp NEAR $L$seal_avx2_short + +$L$seal_avx2_192: + vmovdqa ymm1,ymm0 + vmovdqa ymm2,ymm0 + vmovdqa ymm5,ymm4 + vmovdqa ymm6,ymm4 + vmovdqa ymm9,ymm8 + vmovdqa ymm10,ymm8 + vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] + vmovdqa ymm11,ymm12 + vmovdqa ymm15,ymm13 + mov r10,10 +$L$seal_avx2_192_rounds: + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,12 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,4 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,12 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,4 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol16] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm3,ymm4,20 + vpslld ymm4,ymm4,12 + vpxor ymm4,ymm4,ymm3 + vpaddd ymm0,ymm0,ymm4 + vpxor ymm12,ymm12,ymm0 + vpshufb ymm12,ymm12,YMMWORD[$L$rol8] + vpaddd ymm8,ymm8,ymm12 + vpxor ymm4,ymm4,ymm8 + vpslld ymm3,ymm4,7 + vpsrld ymm4,ymm4,25 + vpxor ymm4,ymm4,ymm3 + vpalignr ymm12,ymm12,ymm12,4 + vpalignr ymm8,ymm8,ymm8,8 + vpalignr ymm4,ymm4,ymm4,12 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol16] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpsrld ymm3,ymm5,20 + vpslld ymm5,ymm5,12 + vpxor ymm5,ymm5,ymm3 + vpaddd ymm1,ymm1,ymm5 + vpxor ymm13,ymm13,ymm1 + vpshufb ymm13,ymm13,YMMWORD[$L$rol8] + vpaddd ymm9,ymm9,ymm13 + vpxor ymm5,ymm5,ymm9 + vpslld ymm3,ymm5,7 + vpsrld ymm5,ymm5,25 + vpxor ymm5,ymm5,ymm3 + vpalignr ymm13,ymm13,ymm13,4 + vpalignr ymm9,ymm9,ymm9,8 + vpalignr ymm5,ymm5,ymm5,12 + + dec r10 + jne NEAR $L$seal_avx2_192_rounds + vpaddd ymm0,ymm0,ymm2 + vpaddd ymm1,ymm1,ymm2 + vpaddd ymm4,ymm4,ymm6 + vpaddd ymm5,ymm5,ymm6 + vpaddd ymm8,ymm8,ymm10 + vpaddd ymm9,ymm9,ymm10 + vpaddd ymm12,ymm12,ymm11 + vpaddd ymm13,ymm13,ymm15 + vperm2i128 ymm3,ymm4,ymm0,0x02 + + vpand ymm3,ymm3,YMMWORD[$L$clamp] + vmovdqa YMMWORD[(160+0)+rbp],ymm3 + + vperm2i128 ymm0,ymm4,ymm0,0x13 + vperm2i128 ymm4,ymm12,ymm8,0x13 + vperm2i128 ymm8,ymm5,ymm1,0x02 + vperm2i128 ymm12,ymm13,ymm9,0x02 + vperm2i128 ymm1,ymm5,ymm1,0x13 + vperm2i128 ymm5,ymm13,ymm9,0x13 +$L$seal_avx2_short: + mov r8,r8 + call poly_hash_ad_internal + xor rcx,rcx +$L$seal_avx2_short_hash_remainder: + cmp rcx,16 + jb NEAR $L$seal_avx2_short_loop + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + sub rcx,16 + add rdi,16 + jmp NEAR $L$seal_avx2_short_hash_remainder +$L$seal_avx2_short_loop: + cmp rbx,32 + jb NEAR $L$seal_avx2_short_tail + sub rbx,32 + + vpxor ymm0,ymm0,YMMWORD[rsi] + vmovdqu YMMWORD[rdi],ymm0 + lea rsi,[32+rsi] + + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + add r10,QWORD[((0+16))+rdi] + adc r11,QWORD[((8+16))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[32+rdi] + + vmovdqa ymm0,ymm4 + vmovdqa ymm4,ymm8 + vmovdqa ymm8,ymm12 + vmovdqa ymm12,ymm1 + vmovdqa ymm1,ymm5 + vmovdqa ymm5,ymm9 + vmovdqa ymm9,ymm13 + vmovdqa ymm13,ymm2 + vmovdqa ymm2,ymm6 + jmp NEAR $L$seal_avx2_short_loop +$L$seal_avx2_short_tail: + cmp rbx,16 + jb NEAR $L$seal_avx2_exit + sub rbx,16 + vpxor xmm3,xmm0,XMMWORD[rsi] + vmovdqu XMMWORD[rdi],xmm3 + lea rsi,[16+rsi] + add r10,QWORD[((0+0))+rdi] + adc r11,QWORD[((8+0))+rdi] + adc r12,1 + mov rax,QWORD[((0+160+0))+rbp] + mov r15,rax + mul r10 + mov r13,rax + mov r14,rdx + mov rax,QWORD[((0+160+0))+rbp] + mul r11 + imul r15,r12 + add r14,rax + adc r15,rdx + mov rax,QWORD[((8+160+0))+rbp] + mov r9,rax + mul r10 + add r14,rax + adc rdx,0 + mov r10,rdx + mov rax,QWORD[((8+160+0))+rbp] + mul r11 + add r15,rax + adc rdx,0 + imul r9,r12 + add r15,r10 + adc r9,rdx + mov r10,r13 + mov r11,r14 + mov r12,r15 + and r12,3 + mov r13,r15 + and r13,-4 + mov r14,r9 + shrd r15,r9,2 + shr r9,2 + add r15,r13 + adc r9,r14 + add r10,r15 + adc r11,r9 + adc r12,0 + + lea rdi,[16+rdi] + vextracti128 xmm0,ymm0,1 +$L$seal_avx2_exit: + vzeroupper + jmp NEAR $L$seal_sse_tail_16 + + +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/crypto/err_data.c b/yass/third_party/boringssl/src/gen/crypto/err_data.c new file mode 100644 index 0000000000..665423b8d1 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/crypto/err_data.c @@ -0,0 +1,1514 @@ +/* Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + + /* This file was generated by go run ./util/pregenerate. */ + +#include +#include + +#include + +static_assert(ERR_LIB_NONE == 1, "library value changed"); +static_assert(ERR_LIB_SYS == 2, "library value changed"); +static_assert(ERR_LIB_BN == 3, "library value changed"); +static_assert(ERR_LIB_RSA == 4, "library value changed"); +static_assert(ERR_LIB_DH == 5, "library value changed"); +static_assert(ERR_LIB_EVP == 6, "library value changed"); +static_assert(ERR_LIB_BUF == 7, "library value changed"); +static_assert(ERR_LIB_OBJ == 8, "library value changed"); +static_assert(ERR_LIB_PEM == 9, "library value changed"); +static_assert(ERR_LIB_DSA == 10, "library value changed"); +static_assert(ERR_LIB_X509 == 11, "library value changed"); +static_assert(ERR_LIB_ASN1 == 12, "library value changed"); +static_assert(ERR_LIB_CONF == 13, "library value changed"); +static_assert(ERR_LIB_CRYPTO == 14, "library value changed"); +static_assert(ERR_LIB_EC == 15, "library value changed"); +static_assert(ERR_LIB_SSL == 16, "library value changed"); +static_assert(ERR_LIB_BIO == 17, "library value changed"); +static_assert(ERR_LIB_PKCS7 == 18, "library value changed"); +static_assert(ERR_LIB_PKCS8 == 19, "library value changed"); +static_assert(ERR_LIB_X509V3 == 20, "library value changed"); +static_assert(ERR_LIB_RAND == 21, "library value changed"); +static_assert(ERR_LIB_ENGINE == 22, "library value changed"); +static_assert(ERR_LIB_OCSP == 23, "library value changed"); +static_assert(ERR_LIB_UI == 24, "library value changed"); +static_assert(ERR_LIB_COMP == 25, "library value changed"); +static_assert(ERR_LIB_ECDSA == 26, "library value changed"); +static_assert(ERR_LIB_ECDH == 27, "library value changed"); +static_assert(ERR_LIB_HMAC == 28, "library value changed"); +static_assert(ERR_LIB_DIGEST == 29, "library value changed"); +static_assert(ERR_LIB_CIPHER == 30, "library value changed"); +static_assert(ERR_LIB_HKDF == 31, "library value changed"); +static_assert(ERR_LIB_TRUST_TOKEN == 32, "library value changed"); +static_assert(ERR_LIB_USER == 33, "library value changed"); +static_assert(ERR_NUM_LIBS == 34, "number of libraries changed"); + +const uint32_t kOpenSSLReasonValues[] = { + 0xc320885, + 0xc32889f, + 0xc3308ae, + 0xc3388be, + 0xc3408cd, + 0xc3488e6, + 0xc3508f2, + 0xc35890f, + 0xc36092f, + 0xc36893d, + 0xc37094d, + 0xc37895a, + 0xc38096a, + 0xc388975, + 0xc39098b, + 0xc39899a, + 0xc3a09ae, + 0xc3a8892, + 0xc3b00f7, + 0xc3b8921, + 0x10320892, + 0x10329654, + 0x10331660, + 0x10339679, + 0x1034168c, + 0x10348f93, + 0x10350cdf, + 0x1035969f, + 0x103616c9, + 0x103696dc, + 0x103716fb, + 0x10379714, + 0x10381729, + 0x10389747, + 0x10391756, + 0x10399772, + 0x103a178d, + 0x103a979c, + 0x103b17b8, + 0x103b97d3, + 0x103c17f9, + 0x103c80f7, + 0x103d180a, + 0x103d981e, + 0x103e183d, + 0x103e984c, + 0x103f1863, + 0x103f9876, + 0x10400ca3, + 0x10409889, + 0x104118a7, + 0x104198ba, + 0x104218d4, + 0x104298e4, + 0x104318f8, + 0x1043990e, + 0x10441926, + 0x1044993b, + 0x1045194f, + 0x10459961, + 0x10460635, + 0x1046899a, + 0x10471976, + 0x1047998d, + 0x104819a2, + 0x104899b0, + 0x10490edf, + 0x104997ea, + 0x104a16b4, + 0x14320c73, + 0x14328c94, + 0x14330ca3, + 0x14338cb5, + 0x143400b9, + 0x143480f7, + 0x14350c81, + 0x18320090, + 0x18328fe9, + 0x183300b9, + 0x18338fff, + 0x18341013, + 0x183480f7, + 0x18351032, + 0x1835904a, + 0x18361072, + 0x18369086, + 0x183710be, + 0x183790d4, + 0x183810e8, + 0x183890f8, + 0x18390ac0, + 0x18399108, + 0x183a112e, + 0x183a9154, + 0x183b0ceb, + 0x183b91a3, + 0x183c11b5, + 0x183c91c0, + 0x183d11d0, + 0x183d91e1, + 0x183e11f2, + 0x183e9204, + 0x183f122d, + 0x183f9246, + 0x1840125e, + 0x1840870d, + 0x18411177, + 0x18419142, + 0x18421161, + 0x18428c81, + 0x1843111d, + 0x18439189, + 0x18441028, + 0x184490aa, + 0x1845105f, + 0x20321298, + 0x20329285, + 0x243212a4, + 0x243289e0, + 0x243312b6, + 0x243392c3, + 0x243412d0, + 0x243492e2, + 0x243512f1, + 0x2435930e, + 0x2436131b, + 0x24369329, + 0x24371337, + 0x24379345, + 0x2438134e, + 0x2438935b, + 0x2439136e, + 0x28320cd3, + 0x28328ceb, + 0x28330ca3, + 0x28338cfe, + 0x28340cdf, + 0x283480b9, + 0x283500f7, + 0x28358c81, + 0x2836099a, + 0x2c3232fa, + 0x2c329385, + 0x2c333308, + 0x2c33b31a, + 0x2c34332e, + 0x2c34b340, + 0x2c35335b, + 0x2c35b36d, + 0x2c36339d, + 0x2c36833a, + 0x2c3733aa, + 0x2c37b3d6, + 0x2c383414, + 0x2c38b42b, + 0x2c393449, + 0x2c39b459, + 0x2c3a346b, + 0x2c3ab47f, + 0x2c3b3490, + 0x2c3bb4af, + 0x2c3c1397, + 0x2c3c93ad, + 0x2c3d34f4, + 0x2c3d93c6, + 0x2c3e351e, + 0x2c3eb52c, + 0x2c3f3544, + 0x2c3fb55c, + 0x2c403586, + 0x2c409298, + 0x2c413597, + 0x2c41b5aa, + 0x2c42125e, + 0x2c42b5bb, + 0x2c43076d, + 0x2c43b4a1, + 0x2c4433e9, + 0x2c44b569, + 0x2c453380, + 0x2c45b3bc, + 0x2c463439, + 0x2c46b4c3, + 0x2c4734d8, + 0x2c47b511, + 0x2c4833fb, + 0x30320000, + 0x30328015, + 0x3033001f, + 0x30338038, + 0x30340057, + 0x30348071, + 0x30350078, + 0x30358090, + 0x303600a1, + 0x303680b9, + 0x303700c6, + 0x303780d5, + 0x303800f7, + 0x30388104, + 0x30390117, + 0x30398132, + 0x303a0147, + 0x303a815b, + 0x303b016f, + 0x303b8180, + 0x303c0199, + 0x303c81b6, + 0x303d01c4, + 0x303d81d8, + 0x303e01e8, + 0x303e8201, + 0x303f0211, + 0x303f8224, + 0x30400233, + 0x3040823f, + 0x30410254, + 0x30418264, + 0x3042027b, + 0x30428288, + 0x3043029b, + 0x304382aa, + 0x304402bf, + 0x304482e0, + 0x304502f3, + 0x30458306, + 0x3046031f, + 0x3046833a, + 0x30470372, + 0x30478384, + 0x304803a2, + 0x304883b3, + 0x304903c2, + 0x304983da, + 0x304a03ec, + 0x304a8400, + 0x304b0418, + 0x304b842b, + 0x304c0436, + 0x304c8447, + 0x304d0453, + 0x304d8469, + 0x304e0477, + 0x304e848d, + 0x304f049f, + 0x304f84b1, + 0x305004d4, + 0x305084e7, + 0x305104f8, + 0x30518508, + 0x30520520, + 0x30528535, + 0x3053054d, + 0x30538561, + 0x30540579, + 0x30548592, + 0x305505ab, + 0x305585c8, + 0x305605d3, + 0x305685eb, + 0x305705fb, + 0x3057860c, + 0x3058061f, + 0x30588635, + 0x3059063e, + 0x30598653, + 0x305a0666, + 0x305a8675, + 0x305b0695, + 0x305b86a4, + 0x305c06c5, + 0x305c86e1, + 0x305d06ed, + 0x305d870d, + 0x305e0729, + 0x305e874d, + 0x305f0763, + 0x305f876d, + 0x306004c4, + 0x3060804a, + 0x30610357, + 0x3061873a, + 0x30620392, + 0x34320bb0, + 0x34328bc4, + 0x34330be1, + 0x34338bf4, + 0x34340c03, + 0x34348c5d, + 0x34350c41, + 0x34358c20, + 0x3c320090, + 0x3c328d28, + 0x3c330d41, + 0x3c338d5c, + 0x3c340d79, + 0x3c348da3, + 0x3c350dbe, + 0x3c358de4, + 0x3c360dfd, + 0x3c368e15, + 0x3c370e26, + 0x3c378e34, + 0x3c380e41, + 0x3c388e55, + 0x3c390ceb, + 0x3c398e78, + 0x3c3a0e8c, + 0x3c3a895a, + 0x3c3b0e9c, + 0x3c3b8eb7, + 0x3c3c0ec9, + 0x3c3c8efc, + 0x3c3d0f06, + 0x3c3d8f1a, + 0x3c3e0f28, + 0x3c3e8f4d, + 0x3c3f0d14, + 0x3c3f8f36, + 0x3c4000b9, + 0x3c4080f7, + 0x3c410d94, + 0x3c418dd3, + 0x3c420edf, + 0x3c428e69, + 0x40321a42, + 0x40329a58, + 0x40331a86, + 0x40339a90, + 0x40341aa7, + 0x40349ac5, + 0x40351ad5, + 0x40359ae7, + 0x40361af4, + 0x40369b00, + 0x40371b15, + 0x40379b27, + 0x40381b32, + 0x40389b44, + 0x40390f93, + 0x40399b54, + 0x403a1b67, + 0x403a9b88, + 0x403b1b99, + 0x403b9ba9, + 0x403c0071, + 0x403c8090, + 0x403d1c0a, + 0x403d9c20, + 0x403e1c2f, + 0x403e9c67, + 0x403f1c81, + 0x403f9ca9, + 0x40401cbe, + 0x40409cd2, + 0x40411d0d, + 0x40419d28, + 0x40421d41, + 0x40429d54, + 0x40431d68, + 0x40439d96, + 0x40441dad, + 0x404480b9, + 0x40451dc2, + 0x40459dd4, + 0x40461df8, + 0x40469e18, + 0x40471e26, + 0x40479e4d, + 0x40481ebe, + 0x40489f78, + 0x40491f8f, + 0x40499fa9, + 0x404a1fc0, + 0x404a9fde, + 0x404b1ff6, + 0x404ba023, + 0x404c2039, + 0x404ca04b, + 0x404d206c, + 0x404da0a5, + 0x404e20b9, + 0x404ea0c6, + 0x404f2177, + 0x404fa1ed, + 0x4050225c, + 0x4050a270, + 0x405122a3, + 0x405222b3, + 0x4052a2d7, + 0x405322ef, + 0x4053a302, + 0x40542317, + 0x4054a33a, + 0x40552365, + 0x4055a3a2, + 0x405623c7, + 0x4056a3e0, + 0x405723f8, + 0x4057a40b, + 0x40582420, + 0x4058a447, + 0x40592476, + 0x4059a4a3, + 0x405aa4b7, + 0x405b24cf, + 0x405ba4e0, + 0x405c24f3, + 0x405ca532, + 0x405d253f, + 0x405da564, + 0x405e25a2, + 0x405e8afe, + 0x405f25c3, + 0x405fa5d0, + 0x406025de, + 0x4060a600, + 0x40612661, + 0x4061a699, + 0x406226b0, + 0x4062a6c1, + 0x4063270e, + 0x4063a723, + 0x4064273a, + 0x4064a766, + 0x40652781, + 0x4065a798, + 0x406627b0, + 0x4066a7da, + 0x40672805, + 0x4067a84a, + 0x40682892, + 0x4068a8b3, + 0x406928e5, + 0x4069a913, + 0x406a2934, + 0x406aa954, + 0x406b2adc, + 0x406baaff, + 0x406c2b15, + 0x406cae1f, + 0x406d2e4e, + 0x406dae76, + 0x406e2ea4, + 0x406eaef1, + 0x406f2f4a, + 0x406faf82, + 0x40702f95, + 0x4070afb2, + 0x4071084d, + 0x4071afc4, + 0x40722fd7, + 0x4072b00d, + 0x40733025, + 0x407395af, + 0x40743039, + 0x4074b053, + 0x40753064, + 0x4075b078, + 0x40763086, + 0x4076935b, + 0x407730ab, + 0x4077b0eb, + 0x40783106, + 0x4078b13f, + 0x40793156, + 0x4079b16c, + 0x407a3198, + 0x407ab1ab, + 0x407b31c0, + 0x407bb1d2, + 0x407c3203, + 0x407cb20c, + 0x407d28ce, + 0x407da215, + 0x407e311b, + 0x407ea457, + 0x407f1e3a, + 0x407fa00d, + 0x40802187, + 0x40809e62, + 0x408122c5, + 0x4081a114, + 0x40822e8f, + 0x40829bb5, + 0x40832432, + 0x4083a74b, + 0x40841e76, + 0x4084a48f, + 0x40852504, + 0x4085a628, + 0x40862584, + 0x4086a22f, + 0x40872ed5, + 0x4087a676, + 0x40881bf3, + 0x4088a85d, + 0x40891c42, + 0x40899bcf, + 0x408a2b4d, + 0x408a99c7, + 0x408b31e7, + 0x408baf5f, + 0x408c2514, + 0x408c99ff, + 0x408d1f5e, + 0x408d9ea8, + 0x408e208e, + 0x408ea382, + 0x408f2871, + 0x408fa644, + 0x40902826, + 0x4090a556, + 0x40912b35, + 0x40919a25, + 0x40921c8f, + 0x4092af10, + 0x40932ff0, + 0x4093a240, + 0x40941e8a, + 0x4094ab66, + 0x409526d2, + 0x4095b178, + 0x40962ebc, + 0x4096a1a0, + 0x4097228b, + 0x4097a0dd, + 0x40981cef, + 0x4098a6e6, + 0x40992f2c, + 0x4099a3af, + 0x409a2348, + 0x409a99e3, + 0x409b1ee4, + 0x409b9f0f, + 0x409c30cd, + 0x409c9f37, + 0x409d215c, + 0x409da12a, + 0x409e1d80, + 0x409ea1d5, + 0x409f21bd, + 0x409f9ed7, + 0x40a021fd, + 0x40a0a0f7, + 0x40a12145, + 0x41f42a07, + 0x41f92a99, + 0x41fe298c, + 0x41feac42, + 0x41ff2d70, + 0x42032a20, + 0x42082a42, + 0x4208aa7e, + 0x42092970, + 0x4209aab8, + 0x420a29c7, + 0x420aa9a7, + 0x420b29e7, + 0x420baa60, + 0x420c2d8c, + 0x420cab76, + 0x420d2c29, + 0x420dac60, + 0x42122c93, + 0x42172d53, + 0x4217acd5, + 0x421c2cf7, + 0x421f2cb2, + 0x42212e04, + 0x42262d36, + 0x422b2de2, + 0x422bac04, + 0x422c2dc4, + 0x422cabb7, + 0x422d2b90, + 0x422dada3, + 0x422e2be3, + 0x42302d12, + 0x4230ac7a, + 0x44320778, + 0x44328787, + 0x44330793, + 0x443387a1, + 0x443407b4, + 0x443487c5, + 0x443507cc, + 0x443587d6, + 0x443607e9, + 0x443687ff, + 0x44370811, + 0x4437881e, + 0x4438082d, + 0x44388835, + 0x4439084d, + 0x4439885b, + 0x443a086e, + 0x48321385, + 0x48329397, + 0x483313ad, + 0x483393c6, + 0x4c321403, + 0x4c329413, + 0x4c331426, + 0x4c339446, + 0x4c3400b9, + 0x4c3480f7, + 0x4c351452, + 0x4c359460, + 0x4c36147c, + 0x4c3694a2, + 0x4c3714b1, + 0x4c3794bf, + 0x4c3814d4, + 0x4c3894e0, + 0x4c391500, + 0x4c39952a, + 0x4c3a1543, + 0x4c3a955c, + 0x4c3b0635, + 0x4c3b9575, + 0x4c3c1587, + 0x4c3c9596, + 0x4c3d15af, + 0x4c3d8cc6, + 0x4c3e161c, + 0x4c3e95be, + 0x4c3f163e, + 0x4c3f935b, + 0x4c4015d4, + 0x4c4093ef, + 0x4c41160c, + 0x4c41948f, + 0x4c4215f8, + 0x4c4293d7, + 0x503235cd, + 0x5032b5dc, + 0x503335e7, + 0x5033b5f7, + 0x50343610, + 0x5034b62a, + 0x50353638, + 0x5035b64e, + 0x50363660, + 0x5036b676, + 0x5037368f, + 0x5037b6a2, + 0x503836ba, + 0x5038b6cb, + 0x503936e0, + 0x5039b6f4, + 0x503a3714, + 0x503ab72a, + 0x503b3742, + 0x503bb754, + 0x503c3770, + 0x503cb787, + 0x503d37a0, + 0x503db7b6, + 0x503e37c3, + 0x503eb7d9, + 0x503f37eb, + 0x503f83b3, + 0x504037fe, + 0x5040b80e, + 0x50413828, + 0x5041b837, + 0x50423851, + 0x5042b86e, + 0x5043387e, + 0x5043b88e, + 0x504438ab, + 0x50448469, + 0x504538bf, + 0x5045b8dd, + 0x504638f0, + 0x5046b906, + 0x50473918, + 0x5047b92d, + 0x50483953, + 0x5048b961, + 0x50493974, + 0x5049b989, + 0x504a399f, + 0x504ab9af, + 0x504b39cf, + 0x504bb9e2, + 0x504c3a05, + 0x504cba33, + 0x504d3a60, + 0x504dba7d, + 0x504e3a98, + 0x504ebab4, + 0x504f3ac6, + 0x504fbadd, + 0x50503aec, + 0x50508729, + 0x50513aff, + 0x5051b89d, + 0x50523a45, + 0x58320fd1, + 0x68320f93, + 0x68328ceb, + 0x68330cfe, + 0x68338fa1, + 0x68340fb1, + 0x683480f7, + 0x6835099a, + 0x6c320f59, + 0x6c328cb5, + 0x6c330f64, + 0x6c338f7d, + 0x74320a66, + 0x743280b9, + 0x74330cc6, + 0x783209cb, + 0x783289e0, + 0x783309ec, + 0x78338090, + 0x783409fb, + 0x78348a10, + 0x78350a2f, + 0x78358a51, + 0x78360a66, + 0x78368a7c, + 0x78370a8c, + 0x78378aad, + 0x78380ac0, + 0x78388ad2, + 0x78390adf, + 0x78398afe, + 0x783a0b13, + 0x783a8b21, + 0x783b0b2b, + 0x783b8b3f, + 0x783c0b56, + 0x783c8b6b, + 0x783d0b82, + 0x783d8b97, + 0x783e0aed, + 0x783e8a9f, + 0x7c321274, + 0x803214a2, + 0x80328090, + 0x803332c9, + 0x803380b9, + 0x803432d8, + 0x8034b240, + 0x8035325e, + 0x8035b2ec, + 0x803632a0, + 0x8036b24f, + 0x80373292, + 0x8037b22d, + 0x803832b3, + 0x8038b26f, + 0x80393284, +}; + +const size_t kOpenSSLReasonValuesLen = sizeof(kOpenSSLReasonValues) / sizeof(kOpenSSLReasonValues[0]); + +const char kOpenSSLReasonStringData[] = + "ASN1_LENGTH_MISMATCH\0" + "AUX_ERROR\0" + "BAD_GET_ASN1_OBJECT_CALL\0" + "BAD_OBJECT_HEADER\0" + "BAD_TEMPLATE\0" + "BMPSTRING_IS_WRONG_LENGTH\0" + "BN_LIB\0" + "BOOLEAN_IS_WRONG_LENGTH\0" + "BUFFER_TOO_SMALL\0" + "CONTEXT_NOT_INITIALISED\0" + "DECODE_ERROR\0" + "DEPTH_EXCEEDED\0" + "DIGEST_AND_KEY_TYPE_NOT_SUPPORTED\0" + "ENCODE_ERROR\0" + "ERROR_GETTING_TIME\0" + "EXPECTING_AN_ASN1_SEQUENCE\0" + "EXPECTING_AN_INTEGER\0" + "EXPECTING_AN_OBJECT\0" + "EXPECTING_A_BOOLEAN\0" + "EXPECTING_A_TIME\0" + "EXPLICIT_LENGTH_MISMATCH\0" + "EXPLICIT_TAG_NOT_CONSTRUCTED\0" + "FIELD_MISSING\0" + "FIRST_NUM_TOO_LARGE\0" + "HEADER_TOO_LONG\0" + "ILLEGAL_BITSTRING_FORMAT\0" + "ILLEGAL_BOOLEAN\0" + "ILLEGAL_CHARACTERS\0" + "ILLEGAL_FORMAT\0" + "ILLEGAL_HEX\0" + "ILLEGAL_IMPLICIT_TAG\0" + "ILLEGAL_INTEGER\0" + "ILLEGAL_NESTED_TAGGING\0" + "ILLEGAL_NULL\0" + "ILLEGAL_NULL_VALUE\0" + "ILLEGAL_OBJECT\0" + "ILLEGAL_OPTIONAL_ANY\0" + "ILLEGAL_OPTIONS_ON_ITEM_TEMPLATE\0" + "ILLEGAL_TAGGED_ANY\0" + "ILLEGAL_TIME_VALUE\0" + "INTEGER_NOT_ASCII_FORMAT\0" + "INTEGER_TOO_LARGE_FOR_LONG\0" + "INVALID_BIT_STRING_BITS_LEFT\0" + "INVALID_BIT_STRING_PADDING\0" + "INVALID_BMPSTRING\0" + "INVALID_DIGIT\0" + "INVALID_INTEGER\0" + "INVALID_MODIFIER\0" + "INVALID_NUMBER\0" + "INVALID_OBJECT_ENCODING\0" + "INVALID_SEPARATOR\0" + "INVALID_TIME_FORMAT\0" + "INVALID_UNIVERSALSTRING\0" + "INVALID_UTF8STRING\0" + "LIST_ERROR\0" + "MISSING_ASN1_EOS\0" + "MISSING_EOC\0" + "MISSING_SECOND_NUMBER\0" + "MISSING_VALUE\0" + "MSTRING_NOT_UNIVERSAL\0" + "MSTRING_WRONG_TAG\0" + "NESTED_ASN1_ERROR\0" + "NESTED_ASN1_STRING\0" + "NESTED_TOO_DEEP\0" + "NON_HEX_CHARACTERS\0" + "NOT_ASCII_FORMAT\0" + "NOT_ENOUGH_DATA\0" + "NO_MATCHING_CHOICE_TYPE\0" + "NULL_IS_WRONG_LENGTH\0" + "OBJECT_NOT_ASCII_FORMAT\0" + "ODD_NUMBER_OF_CHARS\0" + "SECOND_NUMBER_TOO_LARGE\0" + "SEQUENCE_LENGTH_MISMATCH\0" + "SEQUENCE_NOT_CONSTRUCTED\0" + "SEQUENCE_OR_SET_NEEDS_CONFIG\0" + "SHORT_LINE\0" + "STREAMING_NOT_SUPPORTED\0" + "STRING_TOO_LONG\0" + "STRING_TOO_SHORT\0" + "TAG_VALUE_TOO_HIGH\0" + "TIME_NOT_ASCII_FORMAT\0" + "TOO_LONG\0" + "TYPE_NOT_CONSTRUCTED\0" + "TYPE_NOT_PRIMITIVE\0" + "UNEXPECTED_EOC\0" + "UNIVERSALSTRING_IS_WRONG_LENGTH\0" + "UNKNOWN_FORMAT\0" + "UNKNOWN_MESSAGE_DIGEST_ALGORITHM\0" + "UNKNOWN_SIGNATURE_ALGORITHM\0" + "UNKNOWN_TAG\0" + "UNSUPPORTED_ANY_DEFINED_BY_TYPE\0" + "UNSUPPORTED_PUBLIC_KEY_TYPE\0" + "UNSUPPORTED_TYPE\0" + "WRONG_INTEGER_TYPE\0" + "WRONG_PUBLIC_KEY_TYPE\0" + "WRONG_TAG\0" + "WRONG_TYPE\0" + "BAD_FOPEN_MODE\0" + "BROKEN_PIPE\0" + "CONNECT_ERROR\0" + "ERROR_SETTING_NBIO\0" + "INVALID_ARGUMENT\0" + "IN_USE\0" + "KEEPALIVE\0" + "NBIO_CONNECT_ERROR\0" + "NO_HOSTNAME_SPECIFIED\0" + "NO_PORT_SPECIFIED\0" + "NO_SUCH_FILE\0" + "NULL_PARAMETER\0" + "SYS_LIB\0" + "UNABLE_TO_CREATE_SOCKET\0" + "UNINITIALIZED\0" + "UNSUPPORTED_METHOD\0" + "WRITE_TO_READ_ONLY_BIO\0" + "ARG2_LT_ARG3\0" + "BAD_ENCODING\0" + "BAD_RECIPROCAL\0" + "BIGNUM_TOO_LONG\0" + "BITS_TOO_SMALL\0" + "CALLED_WITH_EVEN_MODULUS\0" + "DIV_BY_ZERO\0" + "EXPAND_ON_STATIC_BIGNUM_DATA\0" + "INPUT_NOT_REDUCED\0" + "INVALID_INPUT\0" + "INVALID_RANGE\0" + "NEGATIVE_NUMBER\0" + "NOT_A_SQUARE\0" + "NOT_INITIALIZED\0" + "NO_INVERSE\0" + "PRIVATE_KEY_TOO_LARGE\0" + "P_IS_NOT_PRIME\0" + "TOO_MANY_ITERATIONS\0" + "TOO_MANY_TEMPORARY_VARIABLES\0" + "AES_KEY_SETUP_FAILED\0" + "BAD_DECRYPT\0" + "BAD_KEY_LENGTH\0" + "CTRL_NOT_IMPLEMENTED\0" + "CTRL_OPERATION_NOT_IMPLEMENTED\0" + "DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH\0" + "INITIALIZATION_ERROR\0" + "INPUT_NOT_INITIALIZED\0" + "INVALID_AD_SIZE\0" + "INVALID_KEY_LENGTH\0" + "INVALID_NONCE\0" + "INVALID_NONCE_SIZE\0" + "INVALID_OPERATION\0" + "IV_TOO_LARGE\0" + "NO_CIPHER_SET\0" + "NO_DIRECTION_SET\0" + "OUTPUT_ALIASES_INPUT\0" + "TAG_TOO_LARGE\0" + "TOO_LARGE\0" + "UNSUPPORTED_AD_SIZE\0" + "UNSUPPORTED_INPUT_SIZE\0" + "UNSUPPORTED_KEY_SIZE\0" + "UNSUPPORTED_NONCE_SIZE\0" + "UNSUPPORTED_TAG_SIZE\0" + "WRONG_FINAL_BLOCK_LENGTH\0" + "LIST_CANNOT_BE_NULL\0" + "MISSING_CLOSE_SQUARE_BRACKET\0" + "MISSING_EQUAL_SIGN\0" + "NO_CLOSE_BRACE\0" + "UNABLE_TO_CREATE_NEW_SECTION\0" + "VARIABLE_EXPANSION_NOT_SUPPORTED\0" + "VARIABLE_EXPANSION_TOO_LONG\0" + "VARIABLE_HAS_NO_VALUE\0" + "BAD_GENERATOR\0" + "INVALID_PARAMETERS\0" + "INVALID_PUBKEY\0" + "MODULUS_TOO_LARGE\0" + "NO_PRIVATE_VALUE\0" + "UNKNOWN_HASH\0" + "BAD_Q_VALUE\0" + "BAD_VERSION\0" + "MISSING_PARAMETERS\0" + "NEED_NEW_SETUP_VALUES\0" + "BIGNUM_OUT_OF_RANGE\0" + "COORDINATES_OUT_OF_RANGE\0" + "D2I_ECPKPARAMETERS_FAILURE\0" + "EC_GROUP_NEW_BY_NAME_FAILURE\0" + "GROUP2PKPARAMETERS_FAILURE\0" + "GROUP_MISMATCH\0" + "I2D_ECPKPARAMETERS_FAILURE\0" + "INCOMPATIBLE_OBJECTS\0" + "INVALID_COFACTOR\0" + "INVALID_COMPRESSED_POINT\0" + "INVALID_COMPRESSION_BIT\0" + "INVALID_ENCODING\0" + "INVALID_FIELD\0" + "INVALID_FORM\0" + "INVALID_GROUP_ORDER\0" + "INVALID_PRIVATE_KEY\0" + "INVALID_SCALAR\0" + "MISSING_PRIVATE_KEY\0" + "NON_NAMED_CURVE\0" + "PKPARAMETERS2GROUP_FAILURE\0" + "POINT_AT_INFINITY\0" + "POINT_IS_NOT_ON_CURVE\0" + "PUBLIC_KEY_VALIDATION_FAILED\0" + "SLOT_FULL\0" + "UNDEFINED_GENERATOR\0" + "UNKNOWN_GROUP\0" + "UNKNOWN_ORDER\0" + "WRONG_CURVE_PARAMETERS\0" + "WRONG_ORDER\0" + "KDF_FAILED\0" + "POINT_ARITHMETIC_FAILURE\0" + "UNKNOWN_DIGEST_LENGTH\0" + "BAD_SIGNATURE\0" + "NOT_IMPLEMENTED\0" + "RANDOM_NUMBER_GENERATION_FAILED\0" + "OPERATION_NOT_SUPPORTED\0" + "COMMAND_NOT_SUPPORTED\0" + "DIFFERENT_KEY_TYPES\0" + "DIFFERENT_PARAMETERS\0" + "EMPTY_PSK\0" + "EXPECTING_AN_EC_KEY_KEY\0" + "EXPECTING_AN_RSA_KEY\0" + "EXPECTING_A_DH_KEY\0" + "EXPECTING_A_DSA_KEY\0" + "ILLEGAL_OR_UNSUPPORTED_PADDING_MODE\0" + "INVALID_BUFFER_SIZE\0" + "INVALID_DIGEST_LENGTH\0" + "INVALID_DIGEST_TYPE\0" + "INVALID_KEYBITS\0" + "INVALID_MGF1_MD\0" + "INVALID_PADDING_MODE\0" + "INVALID_PEER_KEY\0" + "INVALID_PSS_SALTLEN\0" + "INVALID_SIGNATURE\0" + "KEYS_NOT_SET\0" + "MEMORY_LIMIT_EXCEEDED\0" + "NOT_A_PRIVATE_KEY\0" + "NOT_XOF_OR_INVALID_LENGTH\0" + "NO_DEFAULT_DIGEST\0" + "NO_KEY_SET\0" + "NO_MDC2_SUPPORT\0" + "NO_NID_FOR_CURVE\0" + "NO_OPERATION_SET\0" + "NO_PARAMETERS_SET\0" + "OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE\0" + "OPERATON_NOT_INITIALIZED\0" + "UNKNOWN_PUBLIC_KEY_TYPE\0" + "UNSUPPORTED_ALGORITHM\0" + "OUTPUT_TOO_LARGE\0" + "INVALID_OID_STRING\0" + "UNKNOWN_NID\0" + "BAD_BASE64_DECODE\0" + "BAD_END_LINE\0" + "BAD_IV_CHARS\0" + "BAD_PASSWORD_READ\0" + "CIPHER_IS_NULL\0" + "ERROR_CONVERTING_PRIVATE_KEY\0" + "NOT_DEK_INFO\0" + "NOT_ENCRYPTED\0" + "NOT_PROC_TYPE\0" + "NO_START_LINE\0" + "READ_KEY\0" + "SHORT_HEADER\0" + "UNSUPPORTED_CIPHER\0" + "UNSUPPORTED_ENCRYPTION\0" + "BAD_PKCS7_VERSION\0" + "NOT_PKCS7_SIGNED_DATA\0" + "NO_CERTIFICATES_INCLUDED\0" + "NO_CRLS_INCLUDED\0" + "AMBIGUOUS_FRIENDLY_NAME\0" + "BAD_ITERATION_COUNT\0" + "BAD_PKCS12_DATA\0" + "BAD_PKCS12_VERSION\0" + "CIPHER_HAS_NO_OBJECT_IDENTIFIER\0" + "CRYPT_ERROR\0" + "ENCRYPT_ERROR\0" + "ERROR_SETTING_CIPHER_PARAMS\0" + "INCORRECT_PASSWORD\0" + "INVALID_CHARACTERS\0" + "KEYGEN_FAILURE\0" + "KEY_GEN_ERROR\0" + "METHOD_NOT_SUPPORTED\0" + "MISSING_MAC\0" + "MULTIPLE_PRIVATE_KEYS_IN_PKCS12\0" + "PKCS12_PUBLIC_KEY_INTEGRITY_NOT_SUPPORTED\0" + "PKCS12_TOO_DEEPLY_NESTED\0" + "PRIVATE_KEY_DECODE_ERROR\0" + "PRIVATE_KEY_ENCODE_ERROR\0" + "UNKNOWN_ALGORITHM\0" + "UNKNOWN_CIPHER\0" + "UNKNOWN_CIPHER_ALGORITHM\0" + "UNKNOWN_DIGEST\0" + "UNSUPPORTED_KEYLENGTH\0" + "UNSUPPORTED_KEY_DERIVATION_FUNCTION\0" + "UNSUPPORTED_OPTIONS\0" + "UNSUPPORTED_PRF\0" + "UNSUPPORTED_PRIVATE_KEY_ALGORITHM\0" + "UNSUPPORTED_SALT_TYPE\0" + "BAD_E_VALUE\0" + "BAD_FIXED_HEADER_DECRYPT\0" + "BAD_PAD_BYTE_COUNT\0" + "BAD_RSA_PARAMETERS\0" + "BLOCK_TYPE_IS_NOT_01\0" + "BLOCK_TYPE_IS_NOT_02\0" + "BN_NOT_INITIALIZED\0" + "CANNOT_RECOVER_MULTI_PRIME_KEY\0" + "CRT_PARAMS_ALREADY_GIVEN\0" + "CRT_VALUES_INCORRECT\0" + "DATA_LEN_NOT_EQUAL_TO_MOD_LEN\0" + "DATA_TOO_LARGE\0" + "DATA_TOO_LARGE_FOR_KEY_SIZE\0" + "DATA_TOO_LARGE_FOR_MODULUS\0" + "DATA_TOO_SMALL\0" + "DATA_TOO_SMALL_FOR_KEY_SIZE\0" + "DIGEST_TOO_BIG_FOR_RSA_KEY\0" + "D_E_NOT_CONGRUENT_TO_1\0" + "D_OUT_OF_RANGE\0" + "EMPTY_PUBLIC_KEY\0" + "FIRST_OCTET_INVALID\0" + "INCONSISTENT_SET_OF_CRT_VALUES\0" + "INTERNAL_ERROR\0" + "INVALID_MESSAGE_LENGTH\0" + "KEY_SIZE_TOO_SMALL\0" + "LAST_OCTET_INVALID\0" + "MUST_HAVE_AT_LEAST_TWO_PRIMES\0" + "NO_PUBLIC_EXPONENT\0" + "NULL_BEFORE_BLOCK_MISSING\0" + "N_NOT_EQUAL_P_Q\0" + "OAEP_DECODING_ERROR\0" + "ONLY_ONE_OF_P_Q_GIVEN\0" + "OUTPUT_BUFFER_TOO_SMALL\0" + "PADDING_CHECK_FAILED\0" + "PKCS_DECODING_ERROR\0" + "SLEN_CHECK_FAILED\0" + "SLEN_RECOVERY_FAILED\0" + "UNKNOWN_ALGORITHM_TYPE\0" + "UNKNOWN_PADDING_TYPE\0" + "VALUE_MISSING\0" + "WRONG_SIGNATURE_LENGTH\0" + "ALPN_MISMATCH_ON_EARLY_DATA\0" + "ALPS_MISMATCH_ON_EARLY_DATA\0" + "APPLICATION_DATA_INSTEAD_OF_HANDSHAKE\0" + "APPLICATION_DATA_ON_SHUTDOWN\0" + "APP_DATA_IN_HANDSHAKE\0" + "ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT\0" + "BAD_ALERT\0" + "BAD_CHANGE_CIPHER_SPEC\0" + "BAD_DATA_RETURNED_BY_CALLBACK\0" + "BAD_DH_P_LENGTH\0" + "BAD_DIGEST_LENGTH\0" + "BAD_ECC_CERT\0" + "BAD_ECPOINT\0" + "BAD_HANDSHAKE_RECORD\0" + "BAD_HELLO_REQUEST\0" + "BAD_LENGTH\0" + "BAD_PACKET_LENGTH\0" + "BAD_RSA_ENCRYPT\0" + "BAD_SRTP_MKI_VALUE\0" + "BAD_SRTP_PROTECTION_PROFILE_LIST\0" + "BAD_SSL_FILETYPE\0" + "BAD_WRITE_RETRY\0" + "BIO_NOT_SET\0" + "BLOCK_CIPHER_PAD_IS_WRONG\0" + "CANNOT_HAVE_BOTH_PRIVKEY_AND_METHOD\0" + "CANNOT_PARSE_LEAF_CERT\0" + "CA_DN_LENGTH_MISMATCH\0" + "CA_DN_TOO_LONG\0" + "CCS_RECEIVED_EARLY\0" + "CERTIFICATE_AND_PRIVATE_KEY_MISMATCH\0" + "CERTIFICATE_VERIFY_FAILED\0" + "CERT_CB_ERROR\0" + "CERT_DECOMPRESSION_FAILED\0" + "CERT_LENGTH_MISMATCH\0" + "CHANNEL_ID_NOT_P256\0" + "CHANNEL_ID_SIGNATURE_INVALID\0" + "CIPHER_MISMATCH_ON_EARLY_DATA\0" + "CIPHER_OR_HASH_UNAVAILABLE\0" + "CLIENTHELLO_PARSE_FAILED\0" + "CLIENTHELLO_TLSEXT\0" + "CONNECTION_REJECTED\0" + "CONNECTION_TYPE_NOT_SET\0" + "COULD_NOT_PARSE_HINTS\0" + "CUSTOM_EXTENSION_ERROR\0" + "DATA_LENGTH_TOO_LONG\0" + "DECRYPTION_FAILED\0" + "DECRYPTION_FAILED_OR_BAD_RECORD_MAC\0" + "DH_PUBLIC_VALUE_LENGTH_IS_WRONG\0" + "DH_P_TOO_LONG\0" + "DIGEST_CHECK_FAILED\0" + "DOWNGRADE_DETECTED\0" + "DTLS_MESSAGE_TOO_BIG\0" + "DUPLICATE_EXTENSION\0" + "DUPLICATE_KEY_SHARE\0" + "DUPLICATE_SIGNATURE_ALGORITHM\0" + "EARLY_DATA_NOT_IN_USE\0" + "ECC_CERT_NOT_FOR_SIGNING\0" + "ECH_REJECTED\0" + "ECH_SERVER_CONFIG_AND_PRIVATE_KEY_MISMATCH\0" + "ECH_SERVER_CONFIG_UNSUPPORTED_EXTENSION\0" + "ECH_SERVER_WOULD_HAVE_NO_RETRY_CONFIGS\0" + "EMPTY_HELLO_RETRY_REQUEST\0" + "EMS_STATE_INCONSISTENT\0" + "ENCRYPTED_LENGTH_TOO_LONG\0" + "ERROR_ADDING_EXTENSION\0" + "ERROR_IN_RECEIVED_CIPHER_LIST\0" + "ERROR_PARSING_EXTENSION\0" + "EXCESSIVE_MESSAGE_SIZE\0" + "EXCESS_HANDSHAKE_DATA\0" + "EXTRA_DATA_IN_MESSAGE\0" + "FRAGMENT_MISMATCH\0" + "GOT_NEXT_PROTO_WITHOUT_EXTENSION\0" + "HANDSHAKE_FAILURE_ON_CLIENT_HELLO\0" + "HANDSHAKE_NOT_COMPLETE\0" + "HTTPS_PROXY_REQUEST\0" + "HTTP_REQUEST\0" + "INAPPROPRIATE_FALLBACK\0" + "INCONSISTENT_CLIENT_HELLO\0" + "INCONSISTENT_ECH_NEGOTIATION\0" + "INVALID_ALPN_PROTOCOL\0" + "INVALID_ALPN_PROTOCOL_LIST\0" + "INVALID_ALPS_CODEPOINT\0" + "INVALID_CLIENT_HELLO_INNER\0" + "INVALID_COMMAND\0" + "INVALID_COMPRESSION_LIST\0" + "INVALID_DELEGATED_CREDENTIAL\0" + "INVALID_ECH_CONFIG_LIST\0" + "INVALID_ECH_PUBLIC_NAME\0" + "INVALID_MESSAGE\0" + "INVALID_OUTER_EXTENSION\0" + "INVALID_OUTER_RECORD_TYPE\0" + "INVALID_SCT_LIST\0" + "INVALID_SIGNATURE_ALGORITHM\0" + "INVALID_SSL_SESSION\0" + "INVALID_TICKET_KEYS_LENGTH\0" + "KEY_USAGE_BIT_INCORRECT\0" + "LENGTH_MISMATCH\0" + "MISSING_EXTENSION\0" + "MISSING_KEY_SHARE\0" + "MISSING_RSA_CERTIFICATE\0" + "MISSING_TMP_DH_KEY\0" + "MISSING_TMP_ECDH_KEY\0" + "MIXED_SPECIAL_OPERATOR_WITH_GROUPS\0" + "MTU_TOO_SMALL\0" + "NEGOTIATED_ALPS_WITHOUT_ALPN\0" + "NEGOTIATED_BOTH_NPN_AND_ALPN\0" + "NEGOTIATED_TB_WITHOUT_EMS_OR_RI\0" + "NESTED_GROUP\0" + "NO_APPLICATION_PROTOCOL\0" + "NO_CERTIFICATES_RETURNED\0" + "NO_CERTIFICATE_ASSIGNED\0" + "NO_CERTIFICATE_SET\0" + "NO_CIPHERS_AVAILABLE\0" + "NO_CIPHERS_PASSED\0" + "NO_CIPHERS_SPECIFIED\0" + "NO_CIPHER_MATCH\0" + "NO_COMMON_SIGNATURE_ALGORITHMS\0" + "NO_COMPRESSION_SPECIFIED\0" + "NO_GROUPS_SPECIFIED\0" + "NO_METHOD_SPECIFIED\0" + "NO_PRIVATE_KEY_ASSIGNED\0" + "NO_RENEGOTIATION\0" + "NO_REQUIRED_DIGEST\0" + "NO_SHARED_CIPHER\0" + "NO_SHARED_GROUP\0" + "NO_SUPPORTED_VERSIONS_ENABLED\0" + "NULL_SSL_CTX\0" + "NULL_SSL_METHOD_PASSED\0" + "OCSP_CB_ERROR\0" + "OLD_SESSION_CIPHER_NOT_RETURNED\0" + "OLD_SESSION_PRF_HASH_MISMATCH\0" + "OLD_SESSION_VERSION_NOT_RETURNED\0" + "PARSE_TLSEXT\0" + "PATH_TOO_LONG\0" + "PEER_DID_NOT_RETURN_A_CERTIFICATE\0" + "PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE\0" + "PRE_SHARED_KEY_MUST_BE_LAST\0" + "PRIVATE_KEY_OPERATION_FAILED\0" + "PROTOCOL_IS_SHUTDOWN\0" + "PSK_IDENTITY_BINDER_COUNT_MISMATCH\0" + "PSK_IDENTITY_NOT_FOUND\0" + "PSK_NO_CLIENT_CB\0" + "PSK_NO_SERVER_CB\0" + "QUIC_INTERNAL_ERROR\0" + "QUIC_TRANSPORT_PARAMETERS_MISCONFIGURED\0" + "READ_TIMEOUT_EXPIRED\0" + "RECORD_LENGTH_MISMATCH\0" + "RECORD_TOO_LARGE\0" + "RENEGOTIATION_EMS_MISMATCH\0" + "RENEGOTIATION_ENCODING_ERR\0" + "RENEGOTIATION_MISMATCH\0" + "REQUIRED_CIPHER_MISSING\0" + "RESUMED_EMS_SESSION_WITHOUT_EMS_EXTENSION\0" + "RESUMED_NON_EMS_SESSION_WITH_EMS_EXTENSION\0" + "SCSV_RECEIVED_WHEN_RENEGOTIATING\0" + "SECOND_SERVERHELLO_VERSION_MISMATCH\0" + "SERVERHELLO_TLSEXT\0" + "SERVER_CERT_CHANGED\0" + "SERVER_ECHOED_INVALID_SESSION_ID\0" + "SESSION_ID_CONTEXT_UNINITIALIZED\0" + "SESSION_MAY_NOT_BE_CREATED\0" + "SHUTDOWN_WHILE_IN_INIT\0" + "SIGNATURE_ALGORITHMS_EXTENSION_SENT_BY_SERVER\0" + "SRTP_COULD_NOT_ALLOCATE_PROFILES\0" + "SRTP_UNKNOWN_PROTECTION_PROFILE\0" + "SSL3_EXT_INVALID_SERVERNAME\0" + "SSLV3_ALERT_BAD_CERTIFICATE\0" + "SSLV3_ALERT_BAD_RECORD_MAC\0" + "SSLV3_ALERT_CERTIFICATE_EXPIRED\0" + "SSLV3_ALERT_CERTIFICATE_REVOKED\0" + "SSLV3_ALERT_CERTIFICATE_UNKNOWN\0" + "SSLV3_ALERT_CLOSE_NOTIFY\0" + "SSLV3_ALERT_DECOMPRESSION_FAILURE\0" + "SSLV3_ALERT_HANDSHAKE_FAILURE\0" + "SSLV3_ALERT_ILLEGAL_PARAMETER\0" + "SSLV3_ALERT_NO_CERTIFICATE\0" + "SSLV3_ALERT_UNEXPECTED_MESSAGE\0" + "SSLV3_ALERT_UNSUPPORTED_CERTIFICATE\0" + "SSL_CTX_HAS_NO_DEFAULT_SSL_VERSION\0" + "SSL_HANDSHAKE_FAILURE\0" + "SSL_SESSION_ID_CONTEXT_TOO_LONG\0" + "SSL_SESSION_ID_TOO_LONG\0" + "TICKET_ENCRYPTION_FAILED\0" + "TLS13_DOWNGRADE\0" + "TLSV1_ALERT_ACCESS_DENIED\0" + "TLSV1_ALERT_BAD_CERTIFICATE_HASH_VALUE\0" + "TLSV1_ALERT_BAD_CERTIFICATE_STATUS_RESPONSE\0" + "TLSV1_ALERT_CERTIFICATE_REQUIRED\0" + "TLSV1_ALERT_CERTIFICATE_UNOBTAINABLE\0" + "TLSV1_ALERT_DECODE_ERROR\0" + "TLSV1_ALERT_DECRYPTION_FAILED\0" + "TLSV1_ALERT_DECRYPT_ERROR\0" + "TLSV1_ALERT_ECH_REQUIRED\0" + "TLSV1_ALERT_EXPORT_RESTRICTION\0" + "TLSV1_ALERT_INAPPROPRIATE_FALLBACK\0" + "TLSV1_ALERT_INSUFFICIENT_SECURITY\0" + "TLSV1_ALERT_INTERNAL_ERROR\0" + "TLSV1_ALERT_NO_APPLICATION_PROTOCOL\0" + "TLSV1_ALERT_NO_RENEGOTIATION\0" + "TLSV1_ALERT_PROTOCOL_VERSION\0" + "TLSV1_ALERT_RECORD_OVERFLOW\0" + "TLSV1_ALERT_UNKNOWN_CA\0" + "TLSV1_ALERT_UNKNOWN_PSK_IDENTITY\0" + "TLSV1_ALERT_UNRECOGNIZED_NAME\0" + "TLSV1_ALERT_UNSUPPORTED_EXTENSION\0" + "TLSV1_ALERT_USER_CANCELLED\0" + "TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST\0" + "TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG\0" + "TOO_MANY_EMPTY_FRAGMENTS\0" + "TOO_MANY_KEY_UPDATES\0" + "TOO_MANY_WARNING_ALERTS\0" + "TOO_MUCH_READ_EARLY_DATA\0" + "TOO_MUCH_SKIPPED_EARLY_DATA\0" + "UNABLE_TO_FIND_ECDH_PARAMETERS\0" + "UNCOMPRESSED_CERT_TOO_LARGE\0" + "UNEXPECTED_COMPATIBILITY_MODE\0" + "UNEXPECTED_EXTENSION\0" + "UNEXPECTED_EXTENSION_ON_EARLY_DATA\0" + "UNEXPECTED_MESSAGE\0" + "UNEXPECTED_OPERATOR_IN_GROUP\0" + "UNEXPECTED_RECORD\0" + "UNKNOWN_ALERT_TYPE\0" + "UNKNOWN_CERTIFICATE_TYPE\0" + "UNKNOWN_CERT_COMPRESSION_ALG\0" + "UNKNOWN_CIPHER_RETURNED\0" + "UNKNOWN_CIPHER_TYPE\0" + "UNKNOWN_KEY_EXCHANGE_TYPE\0" + "UNKNOWN_PROTOCOL\0" + "UNKNOWN_SSL_VERSION\0" + "UNKNOWN_STATE\0" + "UNSAFE_LEGACY_RENEGOTIATION_DISABLED\0" + "UNSUPPORTED_COMPRESSION_ALGORITHM\0" + "UNSUPPORTED_ECH_SERVER_CONFIG\0" + "UNSUPPORTED_ELLIPTIC_CURVE\0" + "UNSUPPORTED_PROTOCOL\0" + "UNSUPPORTED_PROTOCOL_FOR_CUSTOM_KEY\0" + "WRONG_CERTIFICATE_TYPE\0" + "WRONG_CIPHER_RETURNED\0" + "WRONG_CURVE\0" + "WRONG_ENCRYPTION_LEVEL_RECEIVED\0" + "WRONG_MESSAGE_TYPE\0" + "WRONG_SIGNATURE_TYPE\0" + "WRONG_SSL_VERSION\0" + "WRONG_VERSION_NUMBER\0" + "WRONG_VERSION_ON_EARLY_DATA\0" + "X509_LIB\0" + "X509_VERIFICATION_SETUP_PROBLEMS\0" + "BAD_VALIDITY_CHECK\0" + "DECODE_FAILURE\0" + "INVALID_KEY_ID\0" + "INVALID_METADATA\0" + "INVALID_METADATA_KEY\0" + "INVALID_PROOF\0" + "INVALID_TOKEN\0" + "NO_KEYS_CONFIGURED\0" + "NO_SRR_KEY_CONFIGURED\0" + "OVER_BATCHSIZE\0" + "SRR_SIGNATURE_ERROR\0" + "TOO_MANY_KEYS\0" + "AKID_MISMATCH\0" + "BAD_X509_FILETYPE\0" + "BASE64_DECODE_ERROR\0" + "CANT_CHECK_DH_KEY\0" + "CERT_ALREADY_IN_HASH_TABLE\0" + "CRL_ALREADY_DELTA\0" + "CRL_VERIFY_FAILURE\0" + "DELTA_CRL_WITHOUT_CRL_NUMBER\0" + "IDP_MISMATCH\0" + "INVALID_DIRECTORY\0" + "INVALID_FIELD_FOR_VERSION\0" + "INVALID_FIELD_NAME\0" + "INVALID_PARAMETER\0" + "INVALID_POLICY_EXTENSION\0" + "INVALID_PSS_PARAMETERS\0" + "INVALID_TRUST\0" + "INVALID_VERSION\0" + "ISSUER_MISMATCH\0" + "KEY_TYPE_MISMATCH\0" + "KEY_VALUES_MISMATCH\0" + "LOADING_CERT_DIR\0" + "LOADING_DEFAULTS\0" + "NAME_TOO_LONG\0" + "NEWER_CRL_NOT_NEWER\0" + "NO_CERTIFICATE_FOUND\0" + "NO_CERTIFICATE_OR_CRL_FOUND\0" + "NO_CERT_SET_FOR_US_TO_VERIFY\0" + "NO_CRL_FOUND\0" + "NO_CRL_NUMBER\0" + "PUBLIC_KEY_DECODE_ERROR\0" + "PUBLIC_KEY_ENCODE_ERROR\0" + "SHOULD_RETRY\0" + "SIGNATURE_ALGORITHM_MISMATCH\0" + "UNKNOWN_KEY_TYPE\0" + "UNKNOWN_PURPOSE_ID\0" + "UNKNOWN_TRUST_ID\0" + "WRONG_LOOKUP_TYPE\0" + "BAD_IP_ADDRESS\0" + "BAD_OBJECT\0" + "BN_DEC2BN_ERROR\0" + "BN_TO_ASN1_INTEGER_ERROR\0" + "CANNOT_FIND_FREE_FUNCTION\0" + "DIRNAME_ERROR\0" + "DISTPOINT_ALREADY_SET\0" + "DUPLICATE_ZONE_ID\0" + "ERROR_CONVERTING_ZONE\0" + "ERROR_CREATING_EXTENSION\0" + "ERROR_IN_EXTENSION\0" + "EXPECTED_A_SECTION_NAME\0" + "EXTENSION_EXISTS\0" + "EXTENSION_NAME_ERROR\0" + "EXTENSION_NOT_FOUND\0" + "EXTENSION_SETTING_NOT_SUPPORTED\0" + "EXTENSION_VALUE_ERROR\0" + "ILLEGAL_EMPTY_EXTENSION\0" + "ILLEGAL_HEX_DIGIT\0" + "INCORRECT_POLICY_SYNTAX_TAG\0" + "INVALID_BOOLEAN_STRING\0" + "INVALID_EXTENSION_STRING\0" + "INVALID_MULTIPLE_RDNS\0" + "INVALID_NAME\0" + "INVALID_NULL_ARGUMENT\0" + "INVALID_NULL_NAME\0" + "INVALID_NULL_VALUE\0" + "INVALID_NUMBERS\0" + "INVALID_OBJECT_IDENTIFIER\0" + "INVALID_OPTION\0" + "INVALID_POLICY_IDENTIFIER\0" + "INVALID_PROXY_POLICY_SETTING\0" + "INVALID_PURPOSE\0" + "INVALID_SECTION\0" + "INVALID_SYNTAX\0" + "INVALID_VALUE\0" + "ISSUER_DECODE_ERROR\0" + "NEED_ORGANIZATION_AND_NUMBERS\0" + "NO_CONFIG_DATABASE\0" + "NO_ISSUER_CERTIFICATE\0" + "NO_ISSUER_DETAILS\0" + "NO_POLICY_IDENTIFIER\0" + "NO_PROXY_CERT_POLICY_LANGUAGE_DEFINED\0" + "NO_PUBLIC_KEY\0" + "NO_SUBJECT_DETAILS\0" + "ODD_NUMBER_OF_DIGITS\0" + "OPERATION_NOT_DEFINED\0" + "OTHERNAME_ERROR\0" + "POLICY_LANGUAGE_ALREADY_DEFINED\0" + "POLICY_PATH_LENGTH\0" + "POLICY_PATH_LENGTH_ALREADY_DEFINED\0" + "POLICY_WHEN_PROXY_LANGUAGE_REQUIRES_NO_POLICY\0" + "SECTION_NOT_FOUND\0" + "TRAILING_DATA_IN_EXTENSION\0" + "UNABLE_TO_GET_ISSUER_DETAILS\0" + "UNABLE_TO_GET_ISSUER_KEYID\0" + "UNKNOWN_BIT_STRING_ARGUMENT\0" + "UNKNOWN_EXTENSION\0" + "UNKNOWN_EXTENSION_NAME\0" + "UNKNOWN_OPTION\0" + "UNSUPPORTED_OPTION\0" + "USER_TOO_LONG\0" + ""; + diff --git a/yass/third_party/boringssl/src/sources.cmake b/yass/third_party/boringssl/src/gen/sources.cmake similarity index 76% rename from yass/third_party/boringssl/src/sources.cmake rename to yass/third_party/boringssl/src/gen/sources.cmake index 262b3fdd87..5d7d3d15ba 100644 --- a/yass/third_party/boringssl/src/sources.cmake +++ b/yass/third_party/boringssl/src/gen/sources.cmake @@ -1,7 +1,702 @@ -# This file contains source lists that are also consumed by -# generate_build_files.py. +# Copyright (c) 2024, Google Inc. # -# TODO(davidben): Move the other source lists into this file. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +# Generated by go ./util/pregenerate. Do not edit manually. + +set( + BCM_SOURCES + + crypto/fipsmodule/bcm.c +) + +set( + BCM_INTERNAL_HEADERS + + crypto/fipsmodule/aes/aes.c + crypto/fipsmodule/aes/aes_nohw.c + crypto/fipsmodule/aes/key_wrap.c + crypto/fipsmodule/aes/mode_wrappers.c + crypto/fipsmodule/bn/add.c + crypto/fipsmodule/bn/asm/x86_64-gcc.c + crypto/fipsmodule/bn/bn.c + crypto/fipsmodule/bn/bytes.c + crypto/fipsmodule/bn/cmp.c + crypto/fipsmodule/bn/ctx.c + crypto/fipsmodule/bn/div.c + crypto/fipsmodule/bn/div_extra.c + crypto/fipsmodule/bn/exponentiation.c + crypto/fipsmodule/bn/gcd.c + crypto/fipsmodule/bn/gcd_extra.c + crypto/fipsmodule/bn/generic.c + crypto/fipsmodule/bn/jacobi.c + crypto/fipsmodule/bn/montgomery.c + crypto/fipsmodule/bn/montgomery_inv.c + crypto/fipsmodule/bn/mul.c + crypto/fipsmodule/bn/prime.c + crypto/fipsmodule/bn/random.c + crypto/fipsmodule/bn/rsaz_exp.c + crypto/fipsmodule/bn/shift.c + crypto/fipsmodule/bn/sqrt.c + crypto/fipsmodule/cipher/aead.c + crypto/fipsmodule/cipher/cipher.c + crypto/fipsmodule/cipher/e_aes.c + crypto/fipsmodule/cipher/e_aesccm.c + crypto/fipsmodule/cmac/cmac.c + crypto/fipsmodule/dh/check.c + crypto/fipsmodule/dh/dh.c + crypto/fipsmodule/digest/digest.c + crypto/fipsmodule/digest/digests.c + crypto/fipsmodule/digestsign/digestsign.c + crypto/fipsmodule/ec/ec.c + crypto/fipsmodule/ec/ec_key.c + crypto/fipsmodule/ec/ec_montgomery.c + crypto/fipsmodule/ec/felem.c + crypto/fipsmodule/ec/oct.c + crypto/fipsmodule/ec/p224-64.c + crypto/fipsmodule/ec/p256-nistz.c + crypto/fipsmodule/ec/p256.c + crypto/fipsmodule/ec/scalar.c + crypto/fipsmodule/ec/simple.c + crypto/fipsmodule/ec/simple_mul.c + crypto/fipsmodule/ec/util.c + crypto/fipsmodule/ec/wnaf.c + crypto/fipsmodule/ecdh/ecdh.c + crypto/fipsmodule/ecdsa/ecdsa.c + crypto/fipsmodule/hkdf/hkdf.c + crypto/fipsmodule/hmac/hmac.c + crypto/fipsmodule/md4/md4.c + crypto/fipsmodule/md5/md5.c + crypto/fipsmodule/modes/cbc.c + crypto/fipsmodule/modes/cfb.c + crypto/fipsmodule/modes/ctr.c + crypto/fipsmodule/modes/gcm.c + crypto/fipsmodule/modes/gcm_nohw.c + crypto/fipsmodule/modes/ofb.c + crypto/fipsmodule/modes/polyval.c + crypto/fipsmodule/rand/ctrdrbg.c + crypto/fipsmodule/rand/fork_detect.c + crypto/fipsmodule/rand/rand.c + crypto/fipsmodule/rand/urandom.c + crypto/fipsmodule/rsa/blinding.c + crypto/fipsmodule/rsa/padding.c + crypto/fipsmodule/rsa/rsa.c + crypto/fipsmodule/rsa/rsa_impl.c + crypto/fipsmodule/self_check/fips.c + crypto/fipsmodule/self_check/self_check.c + crypto/fipsmodule/service_indicator/service_indicator.c + crypto/fipsmodule/sha/sha1.c + crypto/fipsmodule/sha/sha256.c + crypto/fipsmodule/sha/sha512.c + crypto/fipsmodule/tls/kdf.c +) + +set( + BCM_SOURCES_ASM + + gen/bcm/aesni-gcm-x86_64-apple.S + gen/bcm/aesni-gcm-x86_64-linux.S + gen/bcm/aesni-x86-apple.S + gen/bcm/aesni-x86-linux.S + gen/bcm/aesni-x86_64-apple.S + gen/bcm/aesni-x86_64-linux.S + gen/bcm/aesv8-armv7-linux.S + gen/bcm/aesv8-armv8-apple.S + gen/bcm/aesv8-armv8-linux.S + gen/bcm/aesv8-armv8-win.S + gen/bcm/aesv8-gcm-armv8-apple.S + gen/bcm/aesv8-gcm-armv8-linux.S + gen/bcm/aesv8-gcm-armv8-win.S + gen/bcm/armv4-mont-linux.S + gen/bcm/armv8-mont-apple.S + gen/bcm/armv8-mont-linux.S + gen/bcm/armv8-mont-win.S + gen/bcm/bn-586-apple.S + gen/bcm/bn-586-linux.S + gen/bcm/bn-armv8-apple.S + gen/bcm/bn-armv8-linux.S + gen/bcm/bn-armv8-win.S + gen/bcm/bsaes-armv7-linux.S + gen/bcm/co-586-apple.S + gen/bcm/co-586-linux.S + gen/bcm/ghash-armv4-linux.S + gen/bcm/ghash-neon-armv8-apple.S + gen/bcm/ghash-neon-armv8-linux.S + gen/bcm/ghash-neon-armv8-win.S + gen/bcm/ghash-ssse3-x86-apple.S + gen/bcm/ghash-ssse3-x86-linux.S + gen/bcm/ghash-ssse3-x86_64-apple.S + gen/bcm/ghash-ssse3-x86_64-linux.S + gen/bcm/ghash-x86-apple.S + gen/bcm/ghash-x86-linux.S + gen/bcm/ghash-x86_64-apple.S + gen/bcm/ghash-x86_64-linux.S + gen/bcm/ghashv8-armv7-linux.S + gen/bcm/ghashv8-armv8-apple.S + gen/bcm/ghashv8-armv8-linux.S + gen/bcm/ghashv8-armv8-win.S + gen/bcm/md5-586-apple.S + gen/bcm/md5-586-linux.S + gen/bcm/md5-x86_64-apple.S + gen/bcm/md5-x86_64-linux.S + gen/bcm/p256-armv8-asm-apple.S + gen/bcm/p256-armv8-asm-linux.S + gen/bcm/p256-armv8-asm-win.S + gen/bcm/p256-x86_64-asm-apple.S + gen/bcm/p256-x86_64-asm-linux.S + gen/bcm/p256_beeu-armv8-asm-apple.S + gen/bcm/p256_beeu-armv8-asm-linux.S + gen/bcm/p256_beeu-armv8-asm-win.S + gen/bcm/p256_beeu-x86_64-asm-apple.S + gen/bcm/p256_beeu-x86_64-asm-linux.S + gen/bcm/rdrand-x86_64-apple.S + gen/bcm/rdrand-x86_64-linux.S + gen/bcm/rsaz-avx2-apple.S + gen/bcm/rsaz-avx2-linux.S + gen/bcm/sha1-586-apple.S + gen/bcm/sha1-586-linux.S + gen/bcm/sha1-armv4-large-linux.S + gen/bcm/sha1-armv8-apple.S + gen/bcm/sha1-armv8-linux.S + gen/bcm/sha1-armv8-win.S + gen/bcm/sha1-x86_64-apple.S + gen/bcm/sha1-x86_64-linux.S + gen/bcm/sha256-586-apple.S + gen/bcm/sha256-586-linux.S + gen/bcm/sha256-armv4-linux.S + gen/bcm/sha256-armv8-apple.S + gen/bcm/sha256-armv8-linux.S + gen/bcm/sha256-armv8-win.S + gen/bcm/sha256-x86_64-apple.S + gen/bcm/sha256-x86_64-linux.S + gen/bcm/sha512-586-apple.S + gen/bcm/sha512-586-linux.S + gen/bcm/sha512-armv4-linux.S + gen/bcm/sha512-armv8-apple.S + gen/bcm/sha512-armv8-linux.S + gen/bcm/sha512-armv8-win.S + gen/bcm/sha512-x86_64-apple.S + gen/bcm/sha512-x86_64-linux.S + gen/bcm/vpaes-armv7-linux.S + gen/bcm/vpaes-armv8-apple.S + gen/bcm/vpaes-armv8-linux.S + gen/bcm/vpaes-armv8-win.S + gen/bcm/vpaes-x86-apple.S + gen/bcm/vpaes-x86-linux.S + gen/bcm/vpaes-x86_64-apple.S + gen/bcm/vpaes-x86_64-linux.S + gen/bcm/x86-mont-apple.S + gen/bcm/x86-mont-linux.S + gen/bcm/x86_64-mont-apple.S + gen/bcm/x86_64-mont-linux.S + gen/bcm/x86_64-mont5-apple.S + gen/bcm/x86_64-mont5-linux.S +) + +set( + BCM_SOURCES_NASM + + gen/bcm/aesni-gcm-x86_64-win.asm + gen/bcm/aesni-x86-win.asm + gen/bcm/aesni-x86_64-win.asm + gen/bcm/bn-586-win.asm + gen/bcm/co-586-win.asm + gen/bcm/ghash-ssse3-x86-win.asm + gen/bcm/ghash-ssse3-x86_64-win.asm + gen/bcm/ghash-x86-win.asm + gen/bcm/ghash-x86_64-win.asm + gen/bcm/md5-586-win.asm + gen/bcm/md5-x86_64-win.asm + gen/bcm/p256-x86_64-asm-win.asm + gen/bcm/p256_beeu-x86_64-asm-win.asm + gen/bcm/rdrand-x86_64-win.asm + gen/bcm/rsaz-avx2-win.asm + gen/bcm/sha1-586-win.asm + gen/bcm/sha1-x86_64-win.asm + gen/bcm/sha256-586-win.asm + gen/bcm/sha256-x86_64-win.asm + gen/bcm/sha512-586-win.asm + gen/bcm/sha512-x86_64-win.asm + gen/bcm/vpaes-x86-win.asm + gen/bcm/vpaes-x86_64-win.asm + gen/bcm/x86-mont-win.asm + gen/bcm/x86_64-mont-win.asm + gen/bcm/x86_64-mont5-win.asm +) + +set( + BSSL_SOURCES + + tool/args.cc + tool/ciphers.cc + tool/client.cc + tool/const.cc + tool/digest.cc + tool/fd.cc + tool/file.cc + tool/generate_ech.cc + tool/generate_ed25519.cc + tool/genrsa.cc + tool/pkcs12.cc + tool/rand.cc + tool/server.cc + tool/sign.cc + tool/speed.cc + tool/tool.cc + tool/transport_common.cc +) + +set( + BSSL_INTERNAL_HEADERS + + tool/internal.h + tool/transport_common.h +) + +set( + CRYPTO_SOURCES + + crypto/asn1/a_bitstr.c + crypto/asn1/a_bool.c + crypto/asn1/a_d2i_fp.c + crypto/asn1/a_dup.c + crypto/asn1/a_gentm.c + crypto/asn1/a_i2d_fp.c + crypto/asn1/a_int.c + crypto/asn1/a_mbstr.c + crypto/asn1/a_object.c + crypto/asn1/a_octet.c + crypto/asn1/a_strex.c + crypto/asn1/a_strnid.c + crypto/asn1/a_time.c + crypto/asn1/a_type.c + crypto/asn1/a_utctm.c + crypto/asn1/asn1_lib.c + crypto/asn1/asn1_par.c + crypto/asn1/asn_pack.c + crypto/asn1/f_int.c + crypto/asn1/f_string.c + crypto/asn1/posix_time.c + crypto/asn1/tasn_dec.c + crypto/asn1/tasn_enc.c + crypto/asn1/tasn_fre.c + crypto/asn1/tasn_new.c + crypto/asn1/tasn_typ.c + crypto/asn1/tasn_utl.c + crypto/base64/base64.c + crypto/bio/bio.c + crypto/bio/bio_mem.c + crypto/bio/connect.c + crypto/bio/errno.c + crypto/bio/fd.c + crypto/bio/file.c + crypto/bio/hexdump.c + crypto/bio/pair.c + crypto/bio/printf.c + crypto/bio/socket.c + crypto/bio/socket_helper.c + crypto/blake2/blake2.c + crypto/bn_extra/bn_asn1.c + crypto/bn_extra/convert.c + crypto/buf/buf.c + crypto/bytestring/asn1_compat.c + crypto/bytestring/ber.c + crypto/bytestring/cbb.c + crypto/bytestring/cbs.c + crypto/bytestring/unicode.c + crypto/chacha/chacha.c + crypto/cipher_extra/cipher_extra.c + crypto/cipher_extra/derive_key.c + crypto/cipher_extra/e_aesctrhmac.c + crypto/cipher_extra/e_aesgcmsiv.c + crypto/cipher_extra/e_chacha20poly1305.c + crypto/cipher_extra/e_des.c + crypto/cipher_extra/e_null.c + crypto/cipher_extra/e_rc2.c + crypto/cipher_extra/e_rc4.c + crypto/cipher_extra/e_tls.c + crypto/cipher_extra/tls_cbc.c + crypto/conf/conf.c + crypto/cpu_aarch64_apple.c + crypto/cpu_aarch64_fuchsia.c + crypto/cpu_aarch64_linux.c + crypto/cpu_aarch64_openbsd.c + crypto/cpu_aarch64_sysreg.c + crypto/cpu_aarch64_win.c + crypto/cpu_arm_freebsd.c + crypto/cpu_arm_linux.c + crypto/cpu_intel.c + crypto/crypto.c + crypto/curve25519/curve25519.c + crypto/curve25519/curve25519_64_adx.c + crypto/curve25519/spake25519.c + crypto/des/des.c + crypto/dh_extra/dh_asn1.c + crypto/dh_extra/params.c + crypto/digest_extra/digest_extra.c + crypto/dsa/dsa.c + crypto/dsa/dsa_asn1.c + crypto/ec_extra/ec_asn1.c + crypto/ec_extra/ec_derive.c + crypto/ec_extra/hash_to_curve.c + crypto/ecdh_extra/ecdh_extra.c + crypto/ecdsa_extra/ecdsa_asn1.c + crypto/engine/engine.c + crypto/err/err.c + crypto/evp/evp.c + crypto/evp/evp_asn1.c + crypto/evp/evp_ctx.c + crypto/evp/p_dh.c + crypto/evp/p_dh_asn1.c + crypto/evp/p_dsa_asn1.c + crypto/evp/p_ec.c + crypto/evp/p_ec_asn1.c + crypto/evp/p_ed25519.c + crypto/evp/p_ed25519_asn1.c + crypto/evp/p_hkdf.c + crypto/evp/p_rsa.c + crypto/evp/p_rsa_asn1.c + crypto/evp/p_x25519.c + crypto/evp/p_x25519_asn1.c + crypto/evp/pbkdf.c + crypto/evp/print.c + crypto/evp/scrypt.c + crypto/evp/sign.c + crypto/ex_data.c + crypto/fipsmodule/fips_shared_support.c + crypto/hpke/hpke.c + crypto/hrss/hrss.c + crypto/keccak/keccak.c + crypto/kyber/kyber.c + crypto/lhash/lhash.c + crypto/mem.c + crypto/obj/obj.c + crypto/obj/obj_xref.c + crypto/pem/pem_all.c + crypto/pem/pem_info.c + crypto/pem/pem_lib.c + crypto/pem/pem_oth.c + crypto/pem/pem_pk8.c + crypto/pem/pem_pkey.c + crypto/pem/pem_x509.c + crypto/pem/pem_xaux.c + crypto/pkcs7/pkcs7.c + crypto/pkcs7/pkcs7_x509.c + crypto/pkcs8/p5_pbev2.c + crypto/pkcs8/pkcs8.c + crypto/pkcs8/pkcs8_x509.c + crypto/poly1305/poly1305.c + crypto/poly1305/poly1305_arm.c + crypto/poly1305/poly1305_vec.c + crypto/pool/pool.c + crypto/rand_extra/deterministic.c + crypto/rand_extra/forkunsafe.c + crypto/rand_extra/getentropy.c + crypto/rand_extra/ios.c + crypto/rand_extra/passive.c + crypto/rand_extra/rand_extra.c + crypto/rand_extra/trusty.c + crypto/rand_extra/windows.c + crypto/rc4/rc4.c + crypto/refcount.c + crypto/rsa_extra/rsa_asn1.c + crypto/rsa_extra/rsa_crypt.c + crypto/rsa_extra/rsa_print.c + crypto/siphash/siphash.c + crypto/spx/address.c + crypto/spx/fors.c + crypto/spx/merkle.c + crypto/spx/spx.c + crypto/spx/spx_util.c + crypto/spx/thash.c + crypto/spx/wots.c + crypto/stack/stack.c + crypto/thread.c + crypto/thread_none.c + crypto/thread_pthread.c + crypto/thread_win.c + crypto/thread_win.cc + crypto/trust_token/pmbtoken.c + crypto/trust_token/trust_token.c + crypto/trust_token/voprf.c + crypto/x509/a_digest.c + crypto/x509/a_sign.c + crypto/x509/a_verify.c + crypto/x509/algorithm.c + crypto/x509/asn1_gen.c + crypto/x509/by_dir.c + crypto/x509/by_file.c + crypto/x509/i2d_pr.c + crypto/x509/name_print.c + crypto/x509/policy.c + crypto/x509/rsa_pss.c + crypto/x509/t_crl.c + crypto/x509/t_req.c + crypto/x509/t_x509.c + crypto/x509/t_x509a.c + crypto/x509/v3_akey.c + crypto/x509/v3_akeya.c + crypto/x509/v3_alt.c + crypto/x509/v3_bcons.c + crypto/x509/v3_bitst.c + crypto/x509/v3_conf.c + crypto/x509/v3_cpols.c + crypto/x509/v3_crld.c + crypto/x509/v3_enum.c + crypto/x509/v3_extku.c + crypto/x509/v3_genn.c + crypto/x509/v3_ia5.c + crypto/x509/v3_info.c + crypto/x509/v3_int.c + crypto/x509/v3_lib.c + crypto/x509/v3_ncons.c + crypto/x509/v3_ocsp.c + crypto/x509/v3_pcons.c + crypto/x509/v3_pmaps.c + crypto/x509/v3_prn.c + crypto/x509/v3_purp.c + crypto/x509/v3_skey.c + crypto/x509/v3_utl.c + crypto/x509/x509.c + crypto/x509/x509_att.c + crypto/x509/x509_cmp.c + crypto/x509/x509_d2.c + crypto/x509/x509_def.c + crypto/x509/x509_ext.c + crypto/x509/x509_lu.c + crypto/x509/x509_obj.c + crypto/x509/x509_req.c + crypto/x509/x509_set.c + crypto/x509/x509_trs.c + crypto/x509/x509_txt.c + crypto/x509/x509_v3.c + crypto/x509/x509_vfy.c + crypto/x509/x509_vpm.c + crypto/x509/x509cset.c + crypto/x509/x509name.c + crypto/x509/x509rset.c + crypto/x509/x509spki.c + crypto/x509/x_algor.c + crypto/x509/x_all.c + crypto/x509/x_attrib.c + crypto/x509/x_crl.c + crypto/x509/x_exten.c + crypto/x509/x_name.c + crypto/x509/x_pubkey.c + crypto/x509/x_req.c + crypto/x509/x_sig.c + crypto/x509/x_spki.c + crypto/x509/x_val.c + crypto/x509/x_x509.c + crypto/x509/x_x509a.c + gen/crypto/err_data.c +) + +set( + CRYPTO_HEADERS + + include/openssl/aead.h + include/openssl/aes.h + include/openssl/arm_arch.h + include/openssl/asm_base.h + include/openssl/asn1.h + include/openssl/asn1_mac.h + include/openssl/asn1t.h + include/openssl/base.h + include/openssl/base64.h + include/openssl/bio.h + include/openssl/blake2.h + include/openssl/blowfish.h + include/openssl/bn.h + include/openssl/buf.h + include/openssl/buffer.h + include/openssl/bytestring.h + include/openssl/cast.h + include/openssl/chacha.h + include/openssl/cipher.h + include/openssl/cmac.h + include/openssl/conf.h + include/openssl/cpu.h + include/openssl/crypto.h + include/openssl/ctrdrbg.h + include/openssl/curve25519.h + include/openssl/des.h + include/openssl/dh.h + include/openssl/digest.h + include/openssl/dsa.h + include/openssl/e_os2.h + include/openssl/ec.h + include/openssl/ec_key.h + include/openssl/ecdh.h + include/openssl/ecdsa.h + include/openssl/engine.h + include/openssl/err.h + include/openssl/evp.h + include/openssl/evp_errors.h + include/openssl/ex_data.h + include/openssl/experimental/kyber.h + include/openssl/experimental/spx.h + include/openssl/hkdf.h + include/openssl/hmac.h + include/openssl/hpke.h + include/openssl/hrss.h + include/openssl/is_boringssl.h + include/openssl/kdf.h + include/openssl/lhash.h + include/openssl/md4.h + include/openssl/md5.h + include/openssl/mem.h + include/openssl/nid.h + include/openssl/obj.h + include/openssl/obj_mac.h + include/openssl/objects.h + include/openssl/opensslconf.h + include/openssl/opensslv.h + include/openssl/ossl_typ.h + include/openssl/pem.h + include/openssl/pkcs12.h + include/openssl/pkcs7.h + include/openssl/pkcs8.h + include/openssl/poly1305.h + include/openssl/pool.h + include/openssl/posix_time.h + include/openssl/rand.h + include/openssl/rc4.h + include/openssl/ripemd.h + include/openssl/rsa.h + include/openssl/safestack.h + include/openssl/service_indicator.h + include/openssl/sha.h + include/openssl/siphash.h + include/openssl/span.h + include/openssl/stack.h + include/openssl/target.h + include/openssl/thread.h + include/openssl/time.h + include/openssl/trust_token.h + include/openssl/type_check.h + include/openssl/x509.h + include/openssl/x509_vfy.h + include/openssl/x509v3.h + include/openssl/x509v3_errors.h +) + +set( + CRYPTO_INTERNAL_HEADERS + + crypto/asn1/internal.h + crypto/bio/internal.h + crypto/bytestring/internal.h + crypto/chacha/internal.h + crypto/cipher_extra/internal.h + crypto/conf/conf_def.h + crypto/conf/internal.h + crypto/cpu_arm_linux.h + crypto/curve25519/curve25519_tables.h + crypto/curve25519/internal.h + crypto/des/internal.h + crypto/dsa/internal.h + crypto/ec_extra/internal.h + crypto/err/internal.h + crypto/evp/internal.h + crypto/fipsmodule/aes/internal.h + crypto/fipsmodule/bn/internal.h + crypto/fipsmodule/bn/rsaz_exp.h + crypto/fipsmodule/cipher/internal.h + crypto/fipsmodule/delocate.h + crypto/fipsmodule/dh/internal.h + crypto/fipsmodule/digest/internal.h + crypto/fipsmodule/digest/md32_common.h + crypto/fipsmodule/ec/builtin_curves.h + crypto/fipsmodule/ec/internal.h + crypto/fipsmodule/ec/p256-nistz-table.h + crypto/fipsmodule/ec/p256-nistz.h + crypto/fipsmodule/ec/p256_table.h + crypto/fipsmodule/ecdsa/internal.h + crypto/fipsmodule/md5/internal.h + crypto/fipsmodule/modes/internal.h + crypto/fipsmodule/rand/fork_detect.h + crypto/fipsmodule/rand/getrandom_fillin.h + crypto/fipsmodule/rand/internal.h + crypto/fipsmodule/rsa/internal.h + crypto/fipsmodule/service_indicator/internal.h + crypto/fipsmodule/sha/internal.h + crypto/fipsmodule/tls/internal.h + crypto/hrss/internal.h + crypto/internal.h + crypto/keccak/internal.h + crypto/kyber/internal.h + crypto/lhash/internal.h + crypto/obj/obj_dat.h + crypto/pkcs7/internal.h + crypto/pkcs8/internal.h + crypto/poly1305/internal.h + crypto/pool/internal.h + crypto/rsa_extra/internal.h + crypto/spx/address.h + crypto/spx/fors.h + crypto/spx/merkle.h + crypto/spx/params.h + crypto/spx/spx_util.h + crypto/spx/thash.h + crypto/spx/wots.h + crypto/trust_token/internal.h + crypto/x509/ext_dat.h + crypto/x509/internal.h + third_party/fiat/curve25519_32.h + third_party/fiat/curve25519_64.h + third_party/fiat/curve25519_64_adx.h + third_party/fiat/curve25519_64_msvc.h + third_party/fiat/p256_32.h + third_party/fiat/p256_64.h + third_party/fiat/p256_64_msvc.h +) + +set( + CRYPTO_SOURCES_ASM + + crypto/curve25519/asm/x25519-asm-arm.S + crypto/hrss/asm/poly_rq_mul.S + crypto/poly1305/poly1305_arm_asm.S + gen/crypto/aes128gcmsiv-x86_64-apple.S + gen/crypto/aes128gcmsiv-x86_64-linux.S + gen/crypto/chacha-armv4-linux.S + gen/crypto/chacha-armv8-apple.S + gen/crypto/chacha-armv8-linux.S + gen/crypto/chacha-armv8-win.S + gen/crypto/chacha-x86-apple.S + gen/crypto/chacha-x86-linux.S + gen/crypto/chacha-x86_64-apple.S + gen/crypto/chacha-x86_64-linux.S + gen/crypto/chacha20_poly1305_armv8-apple.S + gen/crypto/chacha20_poly1305_armv8-linux.S + gen/crypto/chacha20_poly1305_armv8-win.S + gen/crypto/chacha20_poly1305_x86_64-apple.S + gen/crypto/chacha20_poly1305_x86_64-linux.S + third_party/fiat/asm/fiat_curve25519_adx_mul.S + third_party/fiat/asm/fiat_curve25519_adx_square.S + third_party/fiat/asm/fiat_p256_adx_mul.S + third_party/fiat/asm/fiat_p256_adx_sqr.S +) + +set( + CRYPTO_SOURCES_NASM + + gen/crypto/aes128gcmsiv-x86_64-win.asm + gen/crypto/chacha-x86-win.asm + gen/crypto/chacha-x86_64-win.asm + gen/crypto/chacha20_poly1305_x86_64-win.asm +) set( CRYPTO_TEST_SOURCES @@ -24,10 +719,10 @@ set( crypto/curve25519/ed25519_test.cc crypto/curve25519/spake25519_test.cc crypto/curve25519/x25519_test.cc - crypto/ecdh_extra/ecdh_test.cc crypto/dh_extra/dh_test.cc crypto/digest_extra/digest_test.cc crypto/dsa/dsa_test.cc + crypto/ecdh_extra/ecdh_test.cc crypto/err/err_test.cc crypto/evp/evp_extra_test.cc crypto/evp/evp_test.cc @@ -47,8 +742,8 @@ set( crypto/fipsmodule/rand/fork_detect_test.cc crypto/fipsmodule/service_indicator/service_indicator_test.cc crypto/fipsmodule/sha/sha_test.cc - crypto/hpke/hpke_test.cc crypto/hmac_extra/hmac_test.cc + crypto/hpke/hpke_test.cc crypto/hrss/hrss_test.cc crypto/impl_dispatch_test.cc crypto/keccak/keccak_test.cc @@ -57,24 +752,20 @@ set( crypto/obj/obj_test.cc crypto/pem/pem_test.cc crypto/pkcs7/pkcs7_test.cc - crypto/pkcs8/pkcs8_test.cc crypto/pkcs8/pkcs12_test.cc + crypto/pkcs8/pkcs8_test.cc crypto/poly1305/poly1305_test.cc crypto/pool/pool_test.cc - crypto/rand_extra/rand_test.cc crypto/rand_extra/getentropy_test.cc + crypto/rand_extra/rand_test.cc crypto/refcount_test.cc crypto/rsa_extra/rsa_test.cc crypto/self_test.cc - crypto/stack/stack_test.cc crypto/siphash/siphash_test.cc crypto/spx/spx_test.cc - crypto/thread_test.cc - # TODO(crbug.com/boringssl/542): This should be in TEST_SUPPORT_SOURCES, so - # that all tests can use it. But it depends on GetTestData, which is not - # currently usable outside of crypto_test. - crypto/test/file_test_gtest.cc + crypto/stack/stack_test.cc crypto/test/gtest_main.cc + crypto/thread_test.cc crypto/trust_token/trust_token_test.cc crypto/x509/tab_test.cc crypto/x509/x509_test.cc @@ -87,8 +778,8 @@ set( crypto/blake2/blake2b256_tests.txt crypto/cipher_extra/test/aes_128_cbc_sha1_tls_implicit_iv_tests.txt crypto/cipher_extra/test/aes_128_cbc_sha1_tls_tests.txt - crypto/cipher_extra/test/aes_128_ccm_bluetooth_tests.txt crypto/cipher_extra/test/aes_128_ccm_bluetooth_8_tests.txt + crypto/cipher_extra/test/aes_128_ccm_bluetooth_tests.txt crypto/cipher_extra/test/aes_128_ccm_matter_tests.txt crypto/cipher_extra/test/aes_128_ctr_hmac_sha256.txt crypto/cipher_extra/test/aes_128_gcm_randnonce_tests.txt @@ -102,7 +793,6 @@ set( crypto/cipher_extra/test/aes_256_gcm_siv_tests.txt crypto/cipher_extra/test/aes_256_gcm_tests.txt crypto/cipher_extra/test/chacha20_poly1305_tests.txt - crypto/cipher_extra/test/xchacha20_poly1305_tests.txt crypto/cipher_extra/test/cipher_tests.txt crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_implicit_iv_tests.txt crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_tests.txt @@ -116,6 +806,7 @@ set( crypto/cipher_extra/test/nist_cavp/aes_256_gcm.txt crypto/cipher_extra/test/nist_cavp/tdes_cbc.txt crypto/cipher_extra/test/nist_cavp/tdes_ecb.txt + crypto/cipher_extra/test/xchacha20_poly1305_tests.txt crypto/curve25519/ed25519_tests.txt crypto/ecdh_extra/ecdh_tests.txt crypto/evp/evp_tests.txt @@ -198,18 +889,19 @@ set( crypto/x509/test/many_names1.pem crypto/x509/test/many_names2.pem crypto/x509/test/many_names3.pem + crypto/x509/test/policy_intermediate.pem crypto/x509/test/policy_intermediate_any.pem crypto/x509/test/policy_intermediate_duplicate.pem crypto/x509/test/policy_intermediate_invalid.pem + crypto/x509/test/policy_intermediate_mapped.pem crypto/x509/test/policy_intermediate_mapped_any.pem crypto/x509/test/policy_intermediate_mapped_oid3.pem - crypto/x509/test/policy_intermediate_mapped.pem - crypto/x509/test/policy_intermediate_require_duplicate.pem - crypto/x509/test/policy_intermediate_require_no_policies.pem crypto/x509/test/policy_intermediate_require.pem crypto/x509/test/policy_intermediate_require1.pem crypto/x509/test/policy_intermediate_require2.pem - crypto/x509/test/policy_intermediate.pem + crypto/x509/test/policy_intermediate_require_duplicate.pem + crypto/x509/test/policy_intermediate_require_no_policies.pem + crypto/x509/test/policy_leaf.pem crypto/x509/test/policy_leaf_any.pem crypto/x509/test/policy_leaf_duplicate.pem crypto/x509/test/policy_leaf_invalid.pem @@ -221,23 +913,22 @@ set( crypto/x509/test/policy_leaf_oid5.pem crypto/x509/test/policy_leaf_require.pem crypto/x509/test/policy_leaf_require1.pem - crypto/x509/test/policy_leaf.pem - crypto/x509/test/policy_root_cross_inhibit_mapping.pem crypto/x509/test/policy_root.pem crypto/x509/test/policy_root2.pem + crypto/x509/test/policy_root_cross_inhibit_mapping.pem + crypto/x509/test/pss_sha1.pem crypto/x509/test/pss_sha1_explicit.pem crypto/x509/test/pss_sha1_mgf1_syntax_error.pem - crypto/x509/test/pss_sha1.pem crypto/x509/test/pss_sha224.pem + crypto/x509/test/pss_sha256.pem crypto/x509/test/pss_sha256_explicit_trailer.pem crypto/x509/test/pss_sha256_mgf1_sha384.pem crypto/x509/test/pss_sha256_mgf1_syntax_error.pem crypto/x509/test/pss_sha256_omit_nulls.pem - crypto/x509/test/pss_sha256_salt_overflow.pem crypto/x509/test/pss_sha256_salt31.pem + crypto/x509/test/pss_sha256_salt_overflow.pem crypto/x509/test/pss_sha256_unknown_mgf.pem crypto/x509/test/pss_sha256_wrong_trailer.pem - crypto/x509/test/pss_sha256.pem crypto/x509/test/pss_sha384.pem crypto/x509/test/pss_sha512.pem crypto/x509/test/some_names1.pem @@ -278,8 +969,8 @@ set( third_party/wycheproof_testvectors/hmac_sha256_test.txt third_party/wycheproof_testvectors/hmac_sha384_test.txt third_party/wycheproof_testvectors/hmac_sha512_test.txt - third_party/wycheproof_testvectors/kwp_test.txt third_party/wycheproof_testvectors/kw_test.txt + third_party/wycheproof_testvectors/kwp_test.txt third_party/wycheproof_testvectors/primality_test.txt third_party/wycheproof_testvectors/rsa_oaep_2048_sha1_mgf1sha1_test.txt third_party/wycheproof_testvectors/rsa_oaep_2048_sha224_mgf1sha1_test.txt @@ -325,18 +1016,25 @@ set( ) set( - URANDOM_TEST_SOURCES + DECREPIT_SOURCES - crypto/fipsmodule/rand/urandom_test.cc -) - -set( - SSL_TEST_SOURCES - - crypto/test/gtest_main.cc - ssl/span_test.cc - ssl/ssl_c_test.c - ssl/ssl_test.cc + decrepit/bio/base64_bio.c + decrepit/blowfish/blowfish.c + decrepit/cast/cast.c + decrepit/cast/cast_tables.c + decrepit/cfb/cfb.c + decrepit/des/cfb64ede.c + decrepit/dh/dh_decrepit.c + decrepit/dsa/dsa_decrepit.c + decrepit/evp/dss1.c + decrepit/evp/evp_do_all.c + decrepit/obj/obj_decrepit.c + decrepit/rc4/rc4_decrepit.c + decrepit/ripemd/ripemd.c + decrepit/rsa/rsa_decrepit.c + decrepit/ssl/ssl_decrepit.c + decrepit/x509/x509_decrepit.c + decrepit/xts/xts.c ) set( @@ -382,21 +1080,75 @@ set( pki/signature_algorithm.cc pki/simple_path_builder_delegate.cc pki/string_util.cc + pki/trust_store.cc pki/trust_store_collection.cc pki/trust_store_in_memory.cc - pki/trust_store.cc pki/verify_certificate_chain.cc + pki/verify_error.cc pki/verify_name_match.cc pki/verify_signed_data.cc ) +set( + PKI_HEADERS + + include/openssl/pki/certificate.h + include/openssl/pki/signature_verify_cache.h + include/openssl/pki/verify_error.h +) + +set( + PKI_INTERNAL_HEADERS + + pki/cert_error_id.h + pki/cert_error_params.h + pki/cert_errors.h + pki/cert_issuer_source.h + pki/cert_issuer_source_static.h + pki/cert_issuer_source_sync_unittest.h + pki/certificate_policies.h + pki/common_cert_errors.h + pki/crl.h + pki/encode_values.h + pki/extended_key_usage.h + pki/general_names.h + pki/input.h + pki/ip_util.h + pki/mock_signature_verify_cache.h + pki/name_constraints.h + pki/nist_pkits_unittest.h + pki/ocsp.h + pki/ocsp_revocation_status.h + pki/ocsp_verify_result.h + pki/parse_certificate.h + pki/parse_name.h + pki/parse_values.h + pki/parsed_certificate.h + pki/parser.h + pki/path_builder.h + pki/pem.h + pki/revocation_util.h + pki/signature_algorithm.h + pki/simple_path_builder_delegate.h + pki/string_util.h + pki/test_helpers.h + pki/testdata/nist-pkits/pkits_testcases-inl.h + pki/trust_store.h + pki/trust_store_collection.h + pki/trust_store_in_memory.h + pki/verify_certificate_chain.h + pki/verify_certificate_chain_typed_unittest.h + pki/verify_name_match.h + pki/verify_signed_data.h +) + set( PKI_TEST_SOURCES crypto/test/gtest_main.cc pki/cert_issuer_source_static_unittest.cc - pki/certificate_unittest.cc pki/certificate_policies_unittest.cc + pki/certificate_unittest.cc pki/crl_unittest.cc pki/encode_values_unittest.cc pki/extended_key_usage_unittest.cc @@ -428,38 +1180,6 @@ set( pki/verify_signed_data_unittest.cc ) -set( - TEST_SUPPORT_SOURCES - - crypto/test/abi_test.cc - crypto/test/file_test.cc - crypto/test/file_util.cc - crypto/test/test_util.cc - crypto/test/wycheproof_util.cc -) - -set( - BSSL_SOURCES - - tool/args.cc - tool/ciphers.cc - tool/client.cc - tool/const.cc - tool/digest.cc - tool/fd.cc - tool/file.cc - tool/generate_ech.cc - tool/generate_ed25519.cc - tool/genrsa.cc - tool/pkcs12.cc - tool/rand.cc - tool/server.cc - tool/sign.cc - tool/speed.cc - tool/tool.cc - tool/transport_common.cc -) - set( PKI_TEST_DATA @@ -468,27 +1188,14 @@ set( pki/testdata/cert_issuer_source_static_unittest/d.pem pki/testdata/cert_issuer_source_static_unittest/e1.pem pki/testdata/cert_issuer_source_static_unittest/e2.pem - pki/testdata/cert_issuer_source_static_unittest/generate-certs.py pki/testdata/cert_issuer_source_static_unittest/i1_1.pem pki/testdata/cert_issuer_source_static_unittest/i1_2.pem pki/testdata/cert_issuer_source_static_unittest/i2.pem pki/testdata/cert_issuer_source_static_unittest/i3_1.pem pki/testdata/cert_issuer_source_static_unittest/i3_2.pem - pki/testdata/cert_issuer_source_static_unittest/keys/C1.key - pki/testdata/cert_issuer_source_static_unittest/keys/C2.key - pki/testdata/cert_issuer_source_static_unittest/keys/D.key - pki/testdata/cert_issuer_source_static_unittest/keys/E1.key - pki/testdata/cert_issuer_source_static_unittest/keys/E2.key - pki/testdata/cert_issuer_source_static_unittest/keys/I1.key - pki/testdata/cert_issuer_source_static_unittest/keys/I2.key - pki/testdata/cert_issuer_source_static_unittest/keys/I3.key - pki/testdata/cert_issuer_source_static_unittest/keys/I3_1.key - pki/testdata/cert_issuer_source_static_unittest/keys/Root.key - pki/testdata/cert_issuer_source_static_unittest/keys/i1_1.key pki/testdata/cert_issuer_source_static_unittest/root.pem pki/testdata/certificate_policies_unittest/anypolicy.pem pki/testdata/certificate_policies_unittest/anypolicy_with_qualifier.pem - pki/testdata/certificate_policies_unittest/generate_policies.py pki/testdata/certificate_policies_unittest/invalid-anypolicy_with_custom_qualifier.pem pki/testdata/certificate_policies_unittest/invalid-empty.pem pki/testdata/certificate_policies_unittest/invalid-policy_1_2_3_dupe.pem @@ -517,7 +1224,6 @@ set( pki/testdata/crl_unittest/bad_thisupdate_in_future.pem pki/testdata/crl_unittest/bad_thisupdate_too_old.pem pki/testdata/crl_unittest/bad_wrong_issuer.pem - pki/testdata/crl_unittest/generate_crl_test_data.py pki/testdata/crl_unittest/good.pem pki/testdata/crl_unittest/good_fake_extension.pem pki/testdata/crl_unittest/good_fake_extension_no_nextupdate.pem @@ -588,7 +1294,6 @@ set( pki/testdata/name_constraints_unittest/dnsname2.pem pki/testdata/name_constraints_unittest/edipartyname-excluded.pem pki/testdata/name_constraints_unittest/edipartyname-permitted.pem - pki/testdata/name_constraints_unittest/generate_name_constraints.py pki/testdata/name_constraints_unittest/invalid-empty_excluded_subtree.pem pki/testdata/name_constraints_unittest/invalid-empty_permitted_subtree.pem pki/testdata/name_constraints_unittest/invalid-no_subtrees.pem @@ -676,8 +1381,6 @@ set( pki/testdata/name_constraints_unittest/uri-permitted.pem pki/testdata/name_constraints_unittest/x400address-excluded.pem pki/testdata/name_constraints_unittest/x400address-permitted.pem - pki/testdata/nist-pkits/BUILD.gn - pki/testdata/nist-pkits/README.chromium pki/testdata/nist-pkits/certs/AllCertificatesNoPoliciesTest2EE.crt pki/testdata/nist-pkits/certs/AllCertificatesSamePoliciesTest10EE.crt pki/testdata/nist-pkits/certs/AllCertificatesSamePoliciesTest13EE.crt @@ -1256,11 +1959,6 @@ set( pki/testdata/nist-pkits/crls/requireExplicitPolicy7subCARE2CRL.crl pki/testdata/nist-pkits/crls/requireExplicitPolicy7subsubCARE2RE4CRL.crl pki/testdata/nist-pkits/crls/requireExplicitPolicy7subsubsubCARE2RE4CRL.crl - pki/testdata/nist-pkits/generate_tests.py - pki/testdata/nist-pkits/pkits_testcases-inl.h - pki/testdata/nist-pkits/test_bundle_data.filelist - pki/testdata/nist-pkits/test_bundle_data.globlist - pki/testdata/ocsp_unittest/annotate_test_data.py pki/testdata/ocsp_unittest/bad_ocsp_type.pem pki/testdata/ocsp_unittest/bad_signature.pem pki/testdata/ocsp_unittest/bad_status.pem @@ -1273,7 +1971,6 @@ set( pki/testdata/ocsp_unittest/has_extension.pem pki/testdata/ocsp_unittest/has_single_extension.pem pki/testdata/ocsp_unittest/has_version.pem - pki/testdata/ocsp_unittest/make_ocsp.py pki/testdata/ocsp_unittest/malformed_request.pem pki/testdata/ocsp_unittest/missing_response.pem pki/testdata/ocsp_unittest/multiple_response.pem @@ -1292,7 +1989,6 @@ set( pki/testdata/parse_certificate_unittest/authority_key_identifier/empty_sequence.pem pki/testdata/parse_certificate_unittest/authority_key_identifier/extra_contents_after_extension_sequence.pem pki/testdata/parse_certificate_unittest/authority_key_identifier/extra_contents_after_issuer_and_serial.pem - pki/testdata/parse_certificate_unittest/authority_key_identifier/generate.py pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_contents.pem pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_issuer.pem pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_key_identifier.pem @@ -1351,8 +2047,6 @@ set( pki/testdata/parse_certificate_unittest/policy_constraints_inhibit_require.pem pki/testdata/parse_certificate_unittest/policy_constraints_require.pem pki/testdata/parse_certificate_unittest/policy_qualifiers_empty_sequence.pem - pki/testdata/parse_certificate_unittest/rebase-errors.py - pki/testdata/parse_certificate_unittest/regenerate_pem_from_ascii.py pki/testdata/parse_certificate_unittest/serial_37_bytes.pem pki/testdata/parse_certificate_unittest/serial_negative.pem pki/testdata/parse_certificate_unittest/serial_not_minimal.pem @@ -1395,20 +2089,12 @@ set( pki/testdata/parse_certificate_unittest/tbs_validity_relaxed.pem pki/testdata/parse_certificate_unittest/tbs_validity_utc_time_and_generalized_time.pem pki/testdata/parse_certificate_unittest/v1_explicit_version.pem - pki/testdata/parse_certificate_unittest/v3_certificate_template.pk8 - pki/testdata/parse_certificate_unittest/v3_certificate_template.txt - pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/generate-certs.py pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/int_match_name_only.pem pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/int_matching.pem pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/int_mismatch.pem - pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/keys/Intermediate.key - pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/keys/Root.key - pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/keys/Root2.key - pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/keys/Target.key pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/root.pem pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/root2.pem pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/target.pem - pki/testdata/path_builder_unittest/key_id_prioritization/generate-certs.py pki/testdata/path_builder_unittest/key_id_prioritization/int_different_ski_a.pem pki/testdata/path_builder_unittest/key_id_prioritization/int_different_ski_b.pem pki/testdata/path_builder_unittest/key_id_prioritization/int_different_ski_c.pem @@ -1418,10 +2104,6 @@ set( pki/testdata/path_builder_unittest/key_id_prioritization/int_no_ski_a.pem pki/testdata/path_builder_unittest/key_id_prioritization/int_no_ski_b.pem pki/testdata/path_builder_unittest/key_id_prioritization/int_no_ski_c.pem - pki/testdata/path_builder_unittest/key_id_prioritization/keys/Intermediate.key - pki/testdata/path_builder_unittest/key_id_prioritization/keys/Intermediate_1.key - pki/testdata/path_builder_unittest/key_id_prioritization/keys/Root.key - pki/testdata/path_builder_unittest/key_id_prioritization/keys/Target.key pki/testdata/path_builder_unittest/key_id_prioritization/root.pem pki/testdata/path_builder_unittest/key_id_prioritization/target.pem pki/testdata/path_builder_unittest/multi-root-A-by-B.pem @@ -1434,46 +2116,22 @@ set( pki/testdata/path_builder_unittest/multi-root-F-by-E.pem pki/testdata/path_builder_unittest/precertificate/precertificate.pem pki/testdata/path_builder_unittest/precertificate/root.pem - pki/testdata/path_builder_unittest/self_issued_prioritization/generate-certs.py - pki/testdata/path_builder_unittest/self_issued_prioritization/keys/Root1.key - pki/testdata/path_builder_unittest/self_issued_prioritization/keys/Root2.key - pki/testdata/path_builder_unittest/self_issued_prioritization/keys/Target.key pki/testdata/path_builder_unittest/self_issued_prioritization/root1.pem pki/testdata/path_builder_unittest/self_issued_prioritization/root1_cross.pem pki/testdata/path_builder_unittest/self_issued_prioritization/root2.pem pki/testdata/path_builder_unittest/self_issued_prioritization/target.pem - pki/testdata/path_builder_unittest/validity_date_prioritization/generate-certs.py pki/testdata/path_builder_unittest/validity_date_prioritization/int_ac.pem pki/testdata/path_builder_unittest/validity_date_prioritization/int_ad.pem pki/testdata/path_builder_unittest/validity_date_prioritization/int_bc.pem pki/testdata/path_builder_unittest/validity_date_prioritization/int_bd.pem - pki/testdata/path_builder_unittest/validity_date_prioritization/keys/Intermediate.key - pki/testdata/path_builder_unittest/validity_date_prioritization/keys/Root.key - pki/testdata/path_builder_unittest/validity_date_prioritization/keys/Target.key pki/testdata/path_builder_unittest/validity_date_prioritization/root.pem pki/testdata/path_builder_unittest/validity_date_prioritization/target.pem - pki/testdata/verify_certificate_chain_unittest/README pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/chain.pem - pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/keys/Intermediate_1.key - pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/keys/Target.key pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/main.test pki/testdata/verify_certificate_chain_unittest/expired-intermediate/chain.pem - pki/testdata/verify_certificate_chain_unittest/expired-intermediate/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/expired-intermediate/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/expired-intermediate/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/expired-intermediate/keys/Target.key pki/testdata/verify_certificate_chain_unittest/expired-intermediate/not-after.test pki/testdata/verify_certificate_chain_unittest/expired-intermediate/not-before.test pki/testdata/verify_certificate_chain_unittest/expired-root/chain.pem - pki/testdata/verify_certificate_chain_unittest/expired-root/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/expired-root/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/expired-root/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/expired-root/keys/Target.key - pki/testdata/verify_certificate_chain_unittest/expired-root/keys/expired-unconstrained-root_Root.key - pki/testdata/verify_certificate_chain_unittest/expired-root/keys/expired-unconstrained-root_Target.key pki/testdata/verify_certificate_chain_unittest/expired-root/not-after-ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/expired-root/not-after-ta-with-expiration-and-constraints.test pki/testdata/verify_certificate_chain_unittest/expired-root/not-after-ta-with-expiration.test @@ -1481,67 +2139,28 @@ set( pki/testdata/verify_certificate_chain_unittest/expired-root/not-before-ta-with-expiration.test pki/testdata/verify_certificate_chain_unittest/expired-root/not-before.test pki/testdata/verify_certificate_chain_unittest/expired-target/chain.pem - pki/testdata/verify_certificate_chain_unittest/expired-target/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/expired-target/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/expired-target/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/expired-target/keys/Target.key pki/testdata/verify_certificate_chain_unittest/expired-target/not-after.test pki/testdata/verify_certificate_chain_unittest/expired-target/not-before.test - pki/testdata/verify_certificate_chain_unittest/generate-all.sh pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/chain.pem - pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/keys/BogusRoot.key - pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/keys/Target.key pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/keys/Intermediate_1.key - pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/keys/Root_1.key - pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/any.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/chain.pem pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/clientauth-strict.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/clientauth.test - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/serverauth.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/any.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/chain.pem pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/clientauth-strict.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/clientauth.test - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/serverauth.test - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/keys/Intermediate_1.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/keys/Root_1.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/keys/Target.key - pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/keys/Target_1.key pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-chain.pem pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-any.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-clientAuth-strict.test @@ -1555,51 +2174,21 @@ set( pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-serverAuth-strict.test pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-serverAuth.test pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/main.test pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/chain.pem - pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/keys/Root_1.key - pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/keys/Target.key pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/main.test pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/anchor.pem pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/anchor.test pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/target.pem pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/target.test - pki/testdata/verify_certificate_chain_unittest/key-rollover/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/key-rollover/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/key-rollover/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/key-rollover/keys/Root_1.key - pki/testdata/verify_certificate_chain_unittest/key-rollover/keys/Target.key pki/testdata/verify_certificate_chain_unittest/key-rollover/longrolloverchain.pem pki/testdata/verify_certificate_chain_unittest/key-rollover/longrolloverchain.test pki/testdata/verify_certificate_chain_unittest/key-rollover/newchain.pem @@ -1608,94 +2197,8 @@ set( pki/testdata/verify_certificate_chain_unittest/key-rollover/oldchain.test pki/testdata/verify_certificate_chain_unittest/key-rollover/rolloverchain.pem pki/testdata/verify_certificate_chain_unittest/key-rollover/rolloverchain.test - pki/testdata/verify_certificate_chain_unittest/many-names/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/many-names/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/many-names/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/many-names/keys/t0.key pki/testdata/verify_certificate_chain_unittest/many-names/ok-all-types.pem pki/testdata/verify_certificate_chain_unittest/many-names/ok-all-types.test - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8D4.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8D5.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8D6.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8D7.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8D8.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8D9.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8DA.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/2FABB43DDCC077802A0309AD437402BF98D8DB.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6F5.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6F6.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6F7.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6F8.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6F9.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6FA.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6FB.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6FC.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/3CE5FC818859A85016C17FD7E52AE5967FC2F6FD.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.db - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.db.attr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.db.attr.old - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.db.old - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.serial - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate.serial.old - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_1.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_1.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_1.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_2.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_2.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_2.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_3.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_3.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_3.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_4.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_4.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_4.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_5.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_5.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_5.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_6.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_6.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_6.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_7.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_7.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Intermediate_7.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.db - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.db.attr - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.db.attr.old - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.db.old - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.serial - pki/testdata/verify_certificate_chain_unittest/many-names/out/Root.serial.old - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0.db - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0.serial - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_1.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_1.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_1.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_2.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_2.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_2.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_3.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_3.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_3.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_4.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_4.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_4.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_5.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_5.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_5.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_6.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_6.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_6.pem - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_7.cnf - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_7.csr - pki/testdata/verify_certificate_chain_unittest/many-names/out/t0_7.pem pki/testdata/verify_certificate_chain_unittest/many-names/toomany-all-types.pem pki/testdata/verify_certificate_chain_unittest/many-names/toomany-all-types.test pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dirnames-excluded.pem @@ -1711,11 +2214,6 @@ set( pki/testdata/verify_certificate_chain_unittest/many-names/toomany-ips-permitted.pem pki/testdata/verify_certificate_chain_unittest/many-names/toomany-ips-permitted.test pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/chain.pem - pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/keys/ShadowRoot.key - pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/keys/Target.key pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/main.test pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.1.2.txt @@ -1813,95 +2311,42 @@ set( pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.9.7.txt pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.9.8.txt pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/main.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/main.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/main.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/main.test pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-ok/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-ok/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-ok/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-ok/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-ok/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-ok/main.test pki/testdata/verify_certificate_chain_unittest/policies-ok/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/main.test pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/main.test pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/main.test pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/chain.pem - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/main.test pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/chain.pem - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/main.test pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/chain.pem - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/keys/Target.key pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/main.test pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/ta-with-constraints.test - pki/testdata/verify_certificate_chain_unittest/rebase-errors.py pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/chain.pem - pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/keys/Target.key pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/main.test pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/chain.pem - pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/keys/Target.key pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-constraints-strict.test pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-constraints.test @@ -1909,28 +2354,16 @@ set( pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-expiration.test pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth.test pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/chain.pem - pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/keys/Target.key pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/main.test pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/ta-with-constraints-require-basic-constraints.test pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/ta-with-require-basic-constraints.test pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/chain.pem - pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/keys/Target.key pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/main.test pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/chain.pem pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/distrusted-root-expired.test pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/distrusted-root.test - pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/main.test pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/ta-with-constraints.test pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/ta-with-expiration.test @@ -1941,120 +2374,46 @@ set( pki/testdata/verify_certificate_chain_unittest/target-eku-any/chain.pem pki/testdata/verify_certificate_chain_unittest/target-eku-any/clientauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-any/clientauth.test - pki/testdata/verify_certificate_chain_unittest/target-eku-any/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-eku-any/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-eku-any/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-eku-any/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-eku-any/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-any/serverauth.test pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/any.test pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/chain.pem pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/clientauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/clientauth.test - pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/serverauth.test pki/testdata/verify_certificate_chain_unittest/target-eku-many/any.test pki/testdata/verify_certificate_chain_unittest/target-eku-many/chain.pem pki/testdata/verify_certificate_chain_unittest/target-eku-many/clientauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-many/clientauth.test - pki/testdata/verify_certificate_chain_unittest/target-eku-many/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-eku-many/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-eku-many/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-eku-many/keys/Target.key - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/3F1D2B1D127E34B62B61B278F274669ADC66ADCC.pem - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/71F49EE7B5F73630C9845EA5B8398B58F3237B18.pem - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/71F49EE7B5F73630C9845EA5B8398B58F3237B19.pem - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.cnf - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.csr - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.db - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.db.attr - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.db.old - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.pem - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.serial - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Intermediate.serial.old - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Issuer.db - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Issuer.serial - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.cnf - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.csr - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.db - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.db.attr - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.db.attr.old - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.db.old - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.pem - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.serial - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Root.serial.old - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Target.cnf - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Target.csr - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Target.db - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Target.pem - pki/testdata/verify_certificate_chain_unittest/target-eku-many/out/Target.serial pki/testdata/verify_certificate_chain_unittest/target-eku-many/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-many/serverauth.test pki/testdata/verify_certificate_chain_unittest/target-eku-none/any.test pki/testdata/verify_certificate_chain_unittest/target-eku-none/chain.pem pki/testdata/verify_certificate_chain_unittest/target-eku-none/clientauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-none/clientauth.test - pki/testdata/verify_certificate_chain_unittest/target-eku-none/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-eku-none/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-eku-none/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-eku-none/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-eku-none/serverauth-strict.test pki/testdata/verify_certificate_chain_unittest/target-eku-none/serverauth.test pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/main.test pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/main.test pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/strict.test pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/target_only-trusted_leaf-strict.test pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/target_only-trusted_leaf.test pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/target_only.pem pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/main.test pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/main.test pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/main.test pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/main.test pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/main.test pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/strict.test pki/testdata/verify_certificate_chain_unittest/target-only/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-only/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-only/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-only/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-only/trusted_anchor.test pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf-and-trust_anchor.test pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf-not_after.test @@ -2062,16 +2421,11 @@ set( pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf.test pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf_require_self_signed.test pki/testdata/verify_certificate_chain_unittest/target-selfissued/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-selfissued/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-selfissued/keys/Target.key - pki/testdata/verify_certificate_chain_unittest/target-selfissued/keys/Target_1.key pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_anchor.test pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_leaf-and-trust_anchor.test pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_leaf.test pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_leaf_require_self_signed.test pki/testdata/verify_certificate_chain_unittest/target-selfsigned/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-selfsigned/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-selfsigned/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf-and-trust_anchor.test pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf-not_after.test pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf-wrong_eku.test @@ -2085,11 +2439,6 @@ set( pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyAgreement.test pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyEncipherment.pem pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyEncipherment.test - pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/keys/Target-ec.key - pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/keys/Target-rsa.key pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-decipherOnly.pem pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-decipherOnly.test pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-digitalSignature.pem @@ -2099,70 +2448,26 @@ set( pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-keyEncipherment.pem pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-keyEncipherment.test pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/main.test pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/main.test pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/main.test pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/main.test pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/target_only-trusted_leaf.test pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/target_only.pem pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/keys/Intermediate_1.key - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/main.test pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/chain.pem - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/keys/Intermediate_1.key - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/keys/Target.key pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/main.test pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/chain.pem - pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/keys/Target.key pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/main.test pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/chain.pem - pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/keys/Intermediate.key - pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/keys/Target.key pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/main.test pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/chain.pem - pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/keys/Intermediate1.key - pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/keys/Intermediate2.key - pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/keys/Target.key pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/main.test pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/chain.pem - pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/generate-chains.py - pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/keys/Intermediate1.key - pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/keys/Intermediate2.key - pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/keys/Root.key - pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/keys/Target.key pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/main.test pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/ta-with-constraints.test pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-case_swap-dupe_attr.pem @@ -2247,9 +2552,6 @@ set( pki/testdata/verify_name_match_unittest/names/unicode_supplementary-UTF8-unmangled.pem pki/testdata/verify_name_match_unittest/names/valid-Name-empty.pem pki/testdata/verify_name_match_unittest/names/valid-minimal.pem - pki/testdata/verify_name_match_unittest/scripts/generate_names.py - pki/testdata/verify_signed_data_unittest/README - pki/testdata/verify_signed_data_unittest/annotate_test_data.py pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-spki-params-null.pem pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-unused-bits-signature.pem pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-using-ecdh-key.pem @@ -2279,3 +2581,130 @@ set( pki/testdata/verify_unittest/google-leaf.der pki/testdata/verify_unittest/self-issued.pem ) + +set( + SSL_SOURCES + + ssl/bio_ssl.cc + ssl/d1_both.cc + ssl/d1_lib.cc + ssl/d1_pkt.cc + ssl/d1_srtp.cc + ssl/dtls_method.cc + ssl/dtls_record.cc + ssl/encrypted_client_hello.cc + ssl/extensions.cc + ssl/handoff.cc + ssl/handshake.cc + ssl/handshake_client.cc + ssl/handshake_server.cc + ssl/s3_both.cc + ssl/s3_lib.cc + ssl/s3_pkt.cc + ssl/ssl_aead_ctx.cc + ssl/ssl_asn1.cc + ssl/ssl_buffer.cc + ssl/ssl_cert.cc + ssl/ssl_cipher.cc + ssl/ssl_credential.cc + ssl/ssl_file.cc + ssl/ssl_key_share.cc + ssl/ssl_lib.cc + ssl/ssl_privkey.cc + ssl/ssl_session.cc + ssl/ssl_stat.cc + ssl/ssl_transcript.cc + ssl/ssl_versions.cc + ssl/ssl_x509.cc + ssl/t1_enc.cc + ssl/tls13_both.cc + ssl/tls13_client.cc + ssl/tls13_enc.cc + ssl/tls13_server.cc + ssl/tls_method.cc + ssl/tls_record.cc +) + +set( + SSL_HEADERS + + include/openssl/dtls1.h + include/openssl/srtp.h + include/openssl/ssl.h + include/openssl/ssl3.h + include/openssl/tls1.h +) + +set( + SSL_INTERNAL_HEADERS + + ssl/internal.h +) + +set( + SSL_TEST_SOURCES + + crypto/test/gtest_main.cc + ssl/span_test.cc + ssl/ssl_c_test.c + ssl/ssl_test.cc +) + +set( + TEST_SUPPORT_SOURCES + + crypto/test/abi_test.cc + crypto/test/file_test.cc + crypto/test/file_test_gtest.cc + crypto/test/file_util.cc + crypto/test/test_data.cc + crypto/test/test_util.cc + crypto/test/wycheproof_util.cc +) + +set( + TEST_SUPPORT_INTERNAL_HEADERS + + crypto/test/abi_test.h + crypto/test/file_test.h + crypto/test/file_util.h + crypto/test/gtest_main.h + crypto/test/test_data.h + crypto/test/test_util.h + crypto/test/wycheproof_util.h + ssl/test/async_bio.h + ssl/test/fuzzer.h + ssl/test/fuzzer_tags.h + ssl/test/handshake_util.h + ssl/test/mock_quic_transport.h + ssl/test/packeted_bio.h + ssl/test/settings_writer.h + ssl/test/test_config.h + ssl/test/test_state.h +) + +set( + TEST_SUPPORT_SOURCES_ASM + + gen/test_support/trampoline-armv4-linux.S + gen/test_support/trampoline-armv8-apple.S + gen/test_support/trampoline-armv8-linux.S + gen/test_support/trampoline-armv8-win.S + gen/test_support/trampoline-x86-apple.S + gen/test_support/trampoline-x86-linux.S + gen/test_support/trampoline-x86_64-apple.S + gen/test_support/trampoline-x86_64-linux.S +) + +set( + TEST_SUPPORT_SOURCES_NASM + + gen/test_support/trampoline-x86-win.asm + gen/test_support/trampoline-x86_64-win.asm +) + +set( + URANDOM_TEST_SOURCES + + crypto/fipsmodule/rand/urandom_test.cc +) diff --git a/yass/third_party/boringssl/src/gen/sources.json b/yass/third_party/boringssl/src/gen/sources.json new file mode 100644 index 0000000000..1fe651706f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/sources.json @@ -0,0 +1,2633 @@ +{ + "bcm": { + "srcs": [ + "crypto/fipsmodule/bcm.c" + ], + "internal_hdrs": [ + "crypto/fipsmodule/aes/aes.c", + "crypto/fipsmodule/aes/aes_nohw.c", + "crypto/fipsmodule/aes/key_wrap.c", + "crypto/fipsmodule/aes/mode_wrappers.c", + "crypto/fipsmodule/bn/add.c", + "crypto/fipsmodule/bn/asm/x86_64-gcc.c", + "crypto/fipsmodule/bn/bn.c", + "crypto/fipsmodule/bn/bytes.c", + "crypto/fipsmodule/bn/cmp.c", + "crypto/fipsmodule/bn/ctx.c", + "crypto/fipsmodule/bn/div.c", + "crypto/fipsmodule/bn/div_extra.c", + "crypto/fipsmodule/bn/exponentiation.c", + "crypto/fipsmodule/bn/gcd.c", + "crypto/fipsmodule/bn/gcd_extra.c", + "crypto/fipsmodule/bn/generic.c", + "crypto/fipsmodule/bn/jacobi.c", + "crypto/fipsmodule/bn/montgomery.c", + "crypto/fipsmodule/bn/montgomery_inv.c", + "crypto/fipsmodule/bn/mul.c", + "crypto/fipsmodule/bn/prime.c", + "crypto/fipsmodule/bn/random.c", + "crypto/fipsmodule/bn/rsaz_exp.c", + "crypto/fipsmodule/bn/shift.c", + "crypto/fipsmodule/bn/sqrt.c", + "crypto/fipsmodule/cipher/aead.c", + "crypto/fipsmodule/cipher/cipher.c", + "crypto/fipsmodule/cipher/e_aes.c", + "crypto/fipsmodule/cipher/e_aesccm.c", + "crypto/fipsmodule/cmac/cmac.c", + "crypto/fipsmodule/dh/check.c", + "crypto/fipsmodule/dh/dh.c", + "crypto/fipsmodule/digest/digest.c", + "crypto/fipsmodule/digest/digests.c", + "crypto/fipsmodule/digestsign/digestsign.c", + "crypto/fipsmodule/ec/ec.c", + "crypto/fipsmodule/ec/ec_key.c", + "crypto/fipsmodule/ec/ec_montgomery.c", + "crypto/fipsmodule/ec/felem.c", + "crypto/fipsmodule/ec/oct.c", + "crypto/fipsmodule/ec/p224-64.c", + "crypto/fipsmodule/ec/p256-nistz.c", + "crypto/fipsmodule/ec/p256.c", + "crypto/fipsmodule/ec/scalar.c", + "crypto/fipsmodule/ec/simple.c", + "crypto/fipsmodule/ec/simple_mul.c", + "crypto/fipsmodule/ec/util.c", + "crypto/fipsmodule/ec/wnaf.c", + "crypto/fipsmodule/ecdh/ecdh.c", + "crypto/fipsmodule/ecdsa/ecdsa.c", + "crypto/fipsmodule/hkdf/hkdf.c", + "crypto/fipsmodule/hmac/hmac.c", + "crypto/fipsmodule/md4/md4.c", + "crypto/fipsmodule/md5/md5.c", + "crypto/fipsmodule/modes/cbc.c", + "crypto/fipsmodule/modes/cfb.c", + "crypto/fipsmodule/modes/ctr.c", + "crypto/fipsmodule/modes/gcm.c", + "crypto/fipsmodule/modes/gcm_nohw.c", + "crypto/fipsmodule/modes/ofb.c", + "crypto/fipsmodule/modes/polyval.c", + "crypto/fipsmodule/rand/ctrdrbg.c", + "crypto/fipsmodule/rand/fork_detect.c", + "crypto/fipsmodule/rand/rand.c", + "crypto/fipsmodule/rand/urandom.c", + "crypto/fipsmodule/rsa/blinding.c", + "crypto/fipsmodule/rsa/padding.c", + "crypto/fipsmodule/rsa/rsa.c", + "crypto/fipsmodule/rsa/rsa_impl.c", + "crypto/fipsmodule/self_check/fips.c", + "crypto/fipsmodule/self_check/self_check.c", + "crypto/fipsmodule/service_indicator/service_indicator.c", + "crypto/fipsmodule/sha/sha1.c", + "crypto/fipsmodule/sha/sha256.c", + "crypto/fipsmodule/sha/sha512.c", + "crypto/fipsmodule/tls/kdf.c" + ], + "asm": [ + "gen/bcm/aesni-gcm-x86_64-apple.S", + "gen/bcm/aesni-gcm-x86_64-linux.S", + "gen/bcm/aesni-x86-apple.S", + "gen/bcm/aesni-x86-linux.S", + "gen/bcm/aesni-x86_64-apple.S", + "gen/bcm/aesni-x86_64-linux.S", + "gen/bcm/aesv8-armv7-linux.S", + "gen/bcm/aesv8-armv8-apple.S", + "gen/bcm/aesv8-armv8-linux.S", + "gen/bcm/aesv8-armv8-win.S", + "gen/bcm/aesv8-gcm-armv8-apple.S", + "gen/bcm/aesv8-gcm-armv8-linux.S", + "gen/bcm/aesv8-gcm-armv8-win.S", + "gen/bcm/armv4-mont-linux.S", + "gen/bcm/armv8-mont-apple.S", + "gen/bcm/armv8-mont-linux.S", + "gen/bcm/armv8-mont-win.S", + "gen/bcm/bn-586-apple.S", + "gen/bcm/bn-586-linux.S", + "gen/bcm/bn-armv8-apple.S", + "gen/bcm/bn-armv8-linux.S", + "gen/bcm/bn-armv8-win.S", + "gen/bcm/bsaes-armv7-linux.S", + "gen/bcm/co-586-apple.S", + "gen/bcm/co-586-linux.S", + "gen/bcm/ghash-armv4-linux.S", + "gen/bcm/ghash-neon-armv8-apple.S", + "gen/bcm/ghash-neon-armv8-linux.S", + "gen/bcm/ghash-neon-armv8-win.S", + "gen/bcm/ghash-ssse3-x86-apple.S", + "gen/bcm/ghash-ssse3-x86-linux.S", + "gen/bcm/ghash-ssse3-x86_64-apple.S", + "gen/bcm/ghash-ssse3-x86_64-linux.S", + "gen/bcm/ghash-x86-apple.S", + "gen/bcm/ghash-x86-linux.S", + "gen/bcm/ghash-x86_64-apple.S", + "gen/bcm/ghash-x86_64-linux.S", + "gen/bcm/ghashv8-armv7-linux.S", + "gen/bcm/ghashv8-armv8-apple.S", + "gen/bcm/ghashv8-armv8-linux.S", + "gen/bcm/ghashv8-armv8-win.S", + "gen/bcm/md5-586-apple.S", + "gen/bcm/md5-586-linux.S", + "gen/bcm/md5-x86_64-apple.S", + "gen/bcm/md5-x86_64-linux.S", + "gen/bcm/p256-armv8-asm-apple.S", + "gen/bcm/p256-armv8-asm-linux.S", + "gen/bcm/p256-armv8-asm-win.S", + "gen/bcm/p256-x86_64-asm-apple.S", + "gen/bcm/p256-x86_64-asm-linux.S", + "gen/bcm/p256_beeu-armv8-asm-apple.S", + "gen/bcm/p256_beeu-armv8-asm-linux.S", + "gen/bcm/p256_beeu-armv8-asm-win.S", + "gen/bcm/p256_beeu-x86_64-asm-apple.S", + "gen/bcm/p256_beeu-x86_64-asm-linux.S", + "gen/bcm/rdrand-x86_64-apple.S", + "gen/bcm/rdrand-x86_64-linux.S", + "gen/bcm/rsaz-avx2-apple.S", + "gen/bcm/rsaz-avx2-linux.S", + "gen/bcm/sha1-586-apple.S", + "gen/bcm/sha1-586-linux.S", + "gen/bcm/sha1-armv4-large-linux.S", + "gen/bcm/sha1-armv8-apple.S", + "gen/bcm/sha1-armv8-linux.S", + "gen/bcm/sha1-armv8-win.S", + "gen/bcm/sha1-x86_64-apple.S", + "gen/bcm/sha1-x86_64-linux.S", + "gen/bcm/sha256-586-apple.S", + "gen/bcm/sha256-586-linux.S", + "gen/bcm/sha256-armv4-linux.S", + "gen/bcm/sha256-armv8-apple.S", + "gen/bcm/sha256-armv8-linux.S", + "gen/bcm/sha256-armv8-win.S", + "gen/bcm/sha256-x86_64-apple.S", + "gen/bcm/sha256-x86_64-linux.S", + "gen/bcm/sha512-586-apple.S", + "gen/bcm/sha512-586-linux.S", + "gen/bcm/sha512-armv4-linux.S", + "gen/bcm/sha512-armv8-apple.S", + "gen/bcm/sha512-armv8-linux.S", + "gen/bcm/sha512-armv8-win.S", + "gen/bcm/sha512-x86_64-apple.S", + "gen/bcm/sha512-x86_64-linux.S", + "gen/bcm/vpaes-armv7-linux.S", + "gen/bcm/vpaes-armv8-apple.S", + "gen/bcm/vpaes-armv8-linux.S", + "gen/bcm/vpaes-armv8-win.S", + "gen/bcm/vpaes-x86-apple.S", + "gen/bcm/vpaes-x86-linux.S", + "gen/bcm/vpaes-x86_64-apple.S", + "gen/bcm/vpaes-x86_64-linux.S", + "gen/bcm/x86-mont-apple.S", + "gen/bcm/x86-mont-linux.S", + "gen/bcm/x86_64-mont-apple.S", + "gen/bcm/x86_64-mont-linux.S", + "gen/bcm/x86_64-mont5-apple.S", + "gen/bcm/x86_64-mont5-linux.S" + ], + "nasm": [ + "gen/bcm/aesni-gcm-x86_64-win.asm", + "gen/bcm/aesni-x86-win.asm", + "gen/bcm/aesni-x86_64-win.asm", + "gen/bcm/bn-586-win.asm", + "gen/bcm/co-586-win.asm", + "gen/bcm/ghash-ssse3-x86-win.asm", + "gen/bcm/ghash-ssse3-x86_64-win.asm", + "gen/bcm/ghash-x86-win.asm", + "gen/bcm/ghash-x86_64-win.asm", + "gen/bcm/md5-586-win.asm", + "gen/bcm/md5-x86_64-win.asm", + "gen/bcm/p256-x86_64-asm-win.asm", + "gen/bcm/p256_beeu-x86_64-asm-win.asm", + "gen/bcm/rdrand-x86_64-win.asm", + "gen/bcm/rsaz-avx2-win.asm", + "gen/bcm/sha1-586-win.asm", + "gen/bcm/sha1-x86_64-win.asm", + "gen/bcm/sha256-586-win.asm", + "gen/bcm/sha256-x86_64-win.asm", + "gen/bcm/sha512-586-win.asm", + "gen/bcm/sha512-x86_64-win.asm", + "gen/bcm/vpaes-x86-win.asm", + "gen/bcm/vpaes-x86_64-win.asm", + "gen/bcm/x86-mont-win.asm", + "gen/bcm/x86_64-mont-win.asm", + "gen/bcm/x86_64-mont5-win.asm" + ] + }, + "bssl": { + "srcs": [ + "tool/args.cc", + "tool/ciphers.cc", + "tool/client.cc", + "tool/const.cc", + "tool/digest.cc", + "tool/fd.cc", + "tool/file.cc", + "tool/generate_ech.cc", + "tool/generate_ed25519.cc", + "tool/genrsa.cc", + "tool/pkcs12.cc", + "tool/rand.cc", + "tool/server.cc", + "tool/sign.cc", + "tool/speed.cc", + "tool/tool.cc", + "tool/transport_common.cc" + ], + "internal_hdrs": [ + "tool/internal.h", + "tool/transport_common.h" + ] + }, + "crypto": { + "srcs": [ + "crypto/asn1/a_bitstr.c", + "crypto/asn1/a_bool.c", + "crypto/asn1/a_d2i_fp.c", + "crypto/asn1/a_dup.c", + "crypto/asn1/a_gentm.c", + "crypto/asn1/a_i2d_fp.c", + "crypto/asn1/a_int.c", + "crypto/asn1/a_mbstr.c", + "crypto/asn1/a_object.c", + "crypto/asn1/a_octet.c", + "crypto/asn1/a_strex.c", + "crypto/asn1/a_strnid.c", + "crypto/asn1/a_time.c", + "crypto/asn1/a_type.c", + "crypto/asn1/a_utctm.c", + "crypto/asn1/asn1_lib.c", + "crypto/asn1/asn1_par.c", + "crypto/asn1/asn_pack.c", + "crypto/asn1/f_int.c", + "crypto/asn1/f_string.c", + "crypto/asn1/posix_time.c", + "crypto/asn1/tasn_dec.c", + "crypto/asn1/tasn_enc.c", + "crypto/asn1/tasn_fre.c", + "crypto/asn1/tasn_new.c", + "crypto/asn1/tasn_typ.c", + "crypto/asn1/tasn_utl.c", + "crypto/base64/base64.c", + "crypto/bio/bio.c", + "crypto/bio/bio_mem.c", + "crypto/bio/connect.c", + "crypto/bio/errno.c", + "crypto/bio/fd.c", + "crypto/bio/file.c", + "crypto/bio/hexdump.c", + "crypto/bio/pair.c", + "crypto/bio/printf.c", + "crypto/bio/socket.c", + "crypto/bio/socket_helper.c", + "crypto/blake2/blake2.c", + "crypto/bn_extra/bn_asn1.c", + "crypto/bn_extra/convert.c", + "crypto/buf/buf.c", + "crypto/bytestring/asn1_compat.c", + "crypto/bytestring/ber.c", + "crypto/bytestring/cbb.c", + "crypto/bytestring/cbs.c", + "crypto/bytestring/unicode.c", + "crypto/chacha/chacha.c", + "crypto/cipher_extra/cipher_extra.c", + "crypto/cipher_extra/derive_key.c", + "crypto/cipher_extra/e_aesctrhmac.c", + "crypto/cipher_extra/e_aesgcmsiv.c", + "crypto/cipher_extra/e_chacha20poly1305.c", + "crypto/cipher_extra/e_des.c", + "crypto/cipher_extra/e_null.c", + "crypto/cipher_extra/e_rc2.c", + "crypto/cipher_extra/e_rc4.c", + "crypto/cipher_extra/e_tls.c", + "crypto/cipher_extra/tls_cbc.c", + "crypto/conf/conf.c", + "crypto/cpu_aarch64_apple.c", + "crypto/cpu_aarch64_fuchsia.c", + "crypto/cpu_aarch64_linux.c", + "crypto/cpu_aarch64_openbsd.c", + "crypto/cpu_aarch64_sysreg.c", + "crypto/cpu_aarch64_win.c", + "crypto/cpu_arm_freebsd.c", + "crypto/cpu_arm_linux.c", + "crypto/cpu_intel.c", + "crypto/crypto.c", + "crypto/curve25519/curve25519.c", + "crypto/curve25519/curve25519_64_adx.c", + "crypto/curve25519/spake25519.c", + "crypto/des/des.c", + "crypto/dh_extra/dh_asn1.c", + "crypto/dh_extra/params.c", + "crypto/digest_extra/digest_extra.c", + "crypto/dsa/dsa.c", + "crypto/dsa/dsa_asn1.c", + "crypto/ec_extra/ec_asn1.c", + "crypto/ec_extra/ec_derive.c", + "crypto/ec_extra/hash_to_curve.c", + "crypto/ecdh_extra/ecdh_extra.c", + "crypto/ecdsa_extra/ecdsa_asn1.c", + "crypto/engine/engine.c", + "crypto/err/err.c", + "crypto/evp/evp.c", + "crypto/evp/evp_asn1.c", + "crypto/evp/evp_ctx.c", + "crypto/evp/p_dh.c", + "crypto/evp/p_dh_asn1.c", + "crypto/evp/p_dsa_asn1.c", + "crypto/evp/p_ec.c", + "crypto/evp/p_ec_asn1.c", + "crypto/evp/p_ed25519.c", + "crypto/evp/p_ed25519_asn1.c", + "crypto/evp/p_hkdf.c", + "crypto/evp/p_rsa.c", + "crypto/evp/p_rsa_asn1.c", + "crypto/evp/p_x25519.c", + "crypto/evp/p_x25519_asn1.c", + "crypto/evp/pbkdf.c", + "crypto/evp/print.c", + "crypto/evp/scrypt.c", + "crypto/evp/sign.c", + "crypto/ex_data.c", + "crypto/fipsmodule/fips_shared_support.c", + "crypto/hpke/hpke.c", + "crypto/hrss/hrss.c", + "crypto/keccak/keccak.c", + "crypto/kyber/kyber.c", + "crypto/lhash/lhash.c", + "crypto/mem.c", + "crypto/obj/obj.c", + "crypto/obj/obj_xref.c", + "crypto/pem/pem_all.c", + "crypto/pem/pem_info.c", + "crypto/pem/pem_lib.c", + "crypto/pem/pem_oth.c", + "crypto/pem/pem_pk8.c", + "crypto/pem/pem_pkey.c", + "crypto/pem/pem_x509.c", + "crypto/pem/pem_xaux.c", + "crypto/pkcs7/pkcs7.c", + "crypto/pkcs7/pkcs7_x509.c", + "crypto/pkcs8/p5_pbev2.c", + "crypto/pkcs8/pkcs8.c", + "crypto/pkcs8/pkcs8_x509.c", + "crypto/poly1305/poly1305.c", + "crypto/poly1305/poly1305_arm.c", + "crypto/poly1305/poly1305_vec.c", + "crypto/pool/pool.c", + "crypto/rand_extra/deterministic.c", + "crypto/rand_extra/forkunsafe.c", + "crypto/rand_extra/getentropy.c", + "crypto/rand_extra/ios.c", + "crypto/rand_extra/passive.c", + "crypto/rand_extra/rand_extra.c", + "crypto/rand_extra/trusty.c", + "crypto/rand_extra/windows.c", + "crypto/rc4/rc4.c", + "crypto/refcount.c", + "crypto/rsa_extra/rsa_asn1.c", + "crypto/rsa_extra/rsa_crypt.c", + "crypto/rsa_extra/rsa_print.c", + "crypto/siphash/siphash.c", + "crypto/spx/address.c", + "crypto/spx/fors.c", + "crypto/spx/merkle.c", + "crypto/spx/spx.c", + "crypto/spx/spx_util.c", + "crypto/spx/thash.c", + "crypto/spx/wots.c", + "crypto/stack/stack.c", + "crypto/thread.c", + "crypto/thread_none.c", + "crypto/thread_pthread.c", + "crypto/thread_win.c", + "crypto/trust_token/pmbtoken.c", + "crypto/trust_token/trust_token.c", + "crypto/trust_token/voprf.c", + "crypto/x509/a_digest.c", + "crypto/x509/a_sign.c", + "crypto/x509/a_verify.c", + "crypto/x509/algorithm.c", + "crypto/x509/asn1_gen.c", + "crypto/x509/by_dir.c", + "crypto/x509/by_file.c", + "crypto/x509/i2d_pr.c", + "crypto/x509/name_print.c", + "crypto/x509/policy.c", + "crypto/x509/rsa_pss.c", + "crypto/x509/t_crl.c", + "crypto/x509/t_req.c", + "crypto/x509/t_x509.c", + "crypto/x509/t_x509a.c", + "crypto/x509/v3_akey.c", + "crypto/x509/v3_akeya.c", + "crypto/x509/v3_alt.c", + "crypto/x509/v3_bcons.c", + "crypto/x509/v3_bitst.c", + "crypto/x509/v3_conf.c", + "crypto/x509/v3_cpols.c", + "crypto/x509/v3_crld.c", + "crypto/x509/v3_enum.c", + "crypto/x509/v3_extku.c", + "crypto/x509/v3_genn.c", + "crypto/x509/v3_ia5.c", + "crypto/x509/v3_info.c", + "crypto/x509/v3_int.c", + "crypto/x509/v3_lib.c", + "crypto/x509/v3_ncons.c", + "crypto/x509/v3_ocsp.c", + "crypto/x509/v3_pcons.c", + "crypto/x509/v3_pmaps.c", + "crypto/x509/v3_prn.c", + "crypto/x509/v3_purp.c", + "crypto/x509/v3_skey.c", + "crypto/x509/v3_utl.c", + "crypto/x509/x509.c", + "crypto/x509/x509_att.c", + "crypto/x509/x509_cmp.c", + "crypto/x509/x509_d2.c", + "crypto/x509/x509_def.c", + "crypto/x509/x509_ext.c", + "crypto/x509/x509_lu.c", + "crypto/x509/x509_obj.c", + "crypto/x509/x509_req.c", + "crypto/x509/x509_set.c", + "crypto/x509/x509_trs.c", + "crypto/x509/x509_txt.c", + "crypto/x509/x509_v3.c", + "crypto/x509/x509_vfy.c", + "crypto/x509/x509_vpm.c", + "crypto/x509/x509cset.c", + "crypto/x509/x509name.c", + "crypto/x509/x509rset.c", + "crypto/x509/x509spki.c", + "crypto/x509/x_algor.c", + "crypto/x509/x_all.c", + "crypto/x509/x_attrib.c", + "crypto/x509/x_crl.c", + "crypto/x509/x_exten.c", + "crypto/x509/x_name.c", + "crypto/x509/x_pubkey.c", + "crypto/x509/x_req.c", + "crypto/x509/x_sig.c", + "crypto/x509/x_spki.c", + "crypto/x509/x_val.c", + "crypto/x509/x_x509.c", + "crypto/x509/x_x509a.c", + "gen/crypto/err_data.c" + ], + "hdrs": [ + "include/openssl/aead.h", + "include/openssl/aes.h", + "include/openssl/arm_arch.h", + "include/openssl/asm_base.h", + "include/openssl/asn1.h", + "include/openssl/asn1_mac.h", + "include/openssl/asn1t.h", + "include/openssl/base.h", + "include/openssl/base64.h", + "include/openssl/bio.h", + "include/openssl/blake2.h", + "include/openssl/blowfish.h", + "include/openssl/bn.h", + "include/openssl/buf.h", + "include/openssl/buffer.h", + "include/openssl/bytestring.h", + "include/openssl/cast.h", + "include/openssl/chacha.h", + "include/openssl/cipher.h", + "include/openssl/cmac.h", + "include/openssl/conf.h", + "include/openssl/cpu.h", + "include/openssl/crypto.h", + "include/openssl/ctrdrbg.h", + "include/openssl/curve25519.h", + "include/openssl/des.h", + "include/openssl/dh.h", + "include/openssl/digest.h", + "include/openssl/dsa.h", + "include/openssl/e_os2.h", + "include/openssl/ec.h", + "include/openssl/ec_key.h", + "include/openssl/ecdh.h", + "include/openssl/ecdsa.h", + "include/openssl/engine.h", + "include/openssl/err.h", + "include/openssl/evp.h", + "include/openssl/evp_errors.h", + "include/openssl/ex_data.h", + "include/openssl/experimental/kyber.h", + "include/openssl/experimental/spx.h", + "include/openssl/hkdf.h", + "include/openssl/hmac.h", + "include/openssl/hpke.h", + "include/openssl/hrss.h", + "include/openssl/is_boringssl.h", + "include/openssl/kdf.h", + "include/openssl/lhash.h", + "include/openssl/md4.h", + "include/openssl/md5.h", + "include/openssl/mem.h", + "include/openssl/nid.h", + "include/openssl/obj.h", + "include/openssl/obj_mac.h", + "include/openssl/objects.h", + "include/openssl/opensslconf.h", + "include/openssl/opensslv.h", + "include/openssl/ossl_typ.h", + "include/openssl/pem.h", + "include/openssl/pkcs12.h", + "include/openssl/pkcs7.h", + "include/openssl/pkcs8.h", + "include/openssl/poly1305.h", + "include/openssl/pool.h", + "include/openssl/posix_time.h", + "include/openssl/rand.h", + "include/openssl/rc4.h", + "include/openssl/ripemd.h", + "include/openssl/rsa.h", + "include/openssl/safestack.h", + "include/openssl/service_indicator.h", + "include/openssl/sha.h", + "include/openssl/siphash.h", + "include/openssl/span.h", + "include/openssl/stack.h", + "include/openssl/target.h", + "include/openssl/thread.h", + "include/openssl/time.h", + "include/openssl/trust_token.h", + "include/openssl/type_check.h", + "include/openssl/x509.h", + "include/openssl/x509_vfy.h", + "include/openssl/x509v3.h", + "include/openssl/x509v3_errors.h" + ], + "internal_hdrs": [ + "crypto/asn1/internal.h", + "crypto/bio/internal.h", + "crypto/bytestring/internal.h", + "crypto/chacha/internal.h", + "crypto/cipher_extra/internal.h", + "crypto/conf/conf_def.h", + "crypto/conf/internal.h", + "crypto/cpu_arm_linux.h", + "crypto/curve25519/curve25519_tables.h", + "crypto/curve25519/internal.h", + "crypto/des/internal.h", + "crypto/dsa/internal.h", + "crypto/ec_extra/internal.h", + "crypto/err/internal.h", + "crypto/evp/internal.h", + "crypto/fipsmodule/aes/internal.h", + "crypto/fipsmodule/bn/internal.h", + "crypto/fipsmodule/bn/rsaz_exp.h", + "crypto/fipsmodule/cipher/internal.h", + "crypto/fipsmodule/delocate.h", + "crypto/fipsmodule/dh/internal.h", + "crypto/fipsmodule/digest/internal.h", + "crypto/fipsmodule/digest/md32_common.h", + "crypto/fipsmodule/ec/builtin_curves.h", + "crypto/fipsmodule/ec/internal.h", + "crypto/fipsmodule/ec/p256-nistz-table.h", + "crypto/fipsmodule/ec/p256-nistz.h", + "crypto/fipsmodule/ec/p256_table.h", + "crypto/fipsmodule/ecdsa/internal.h", + "crypto/fipsmodule/md5/internal.h", + "crypto/fipsmodule/modes/internal.h", + "crypto/fipsmodule/rand/fork_detect.h", + "crypto/fipsmodule/rand/getrandom_fillin.h", + "crypto/fipsmodule/rand/internal.h", + "crypto/fipsmodule/rsa/internal.h", + "crypto/fipsmodule/service_indicator/internal.h", + "crypto/fipsmodule/sha/internal.h", + "crypto/fipsmodule/tls/internal.h", + "crypto/hrss/internal.h", + "crypto/internal.h", + "crypto/keccak/internal.h", + "crypto/kyber/internal.h", + "crypto/lhash/internal.h", + "crypto/obj/obj_dat.h", + "crypto/pkcs7/internal.h", + "crypto/pkcs8/internal.h", + "crypto/poly1305/internal.h", + "crypto/pool/internal.h", + "crypto/rsa_extra/internal.h", + "crypto/spx/address.h", + "crypto/spx/fors.h", + "crypto/spx/merkle.h", + "crypto/spx/params.h", + "crypto/spx/spx_util.h", + "crypto/spx/thash.h", + "crypto/spx/wots.h", + "crypto/trust_token/internal.h", + "crypto/x509/ext_dat.h", + "crypto/x509/internal.h", + "third_party/fiat/curve25519_32.h", + "third_party/fiat/curve25519_64.h", + "third_party/fiat/curve25519_64_adx.h", + "third_party/fiat/curve25519_64_msvc.h", + "third_party/fiat/p256_32.h", + "third_party/fiat/p256_64.h", + "third_party/fiat/p256_64_msvc.h" + ], + "asm": [ + "crypto/curve25519/asm/x25519-asm-arm.S", + "crypto/hrss/asm/poly_rq_mul.S", + "crypto/poly1305/poly1305_arm_asm.S", + "gen/crypto/aes128gcmsiv-x86_64-apple.S", + "gen/crypto/aes128gcmsiv-x86_64-linux.S", + "gen/crypto/chacha-armv4-linux.S", + "gen/crypto/chacha-armv8-apple.S", + "gen/crypto/chacha-armv8-linux.S", + "gen/crypto/chacha-armv8-win.S", + "gen/crypto/chacha-x86-apple.S", + "gen/crypto/chacha-x86-linux.S", + "gen/crypto/chacha-x86_64-apple.S", + "gen/crypto/chacha-x86_64-linux.S", + "gen/crypto/chacha20_poly1305_armv8-apple.S", + "gen/crypto/chacha20_poly1305_armv8-linux.S", + "gen/crypto/chacha20_poly1305_armv8-win.S", + "gen/crypto/chacha20_poly1305_x86_64-apple.S", + "gen/crypto/chacha20_poly1305_x86_64-linux.S", + "third_party/fiat/asm/fiat_curve25519_adx_mul.S", + "third_party/fiat/asm/fiat_curve25519_adx_square.S", + "third_party/fiat/asm/fiat_p256_adx_mul.S", + "third_party/fiat/asm/fiat_p256_adx_sqr.S" + ], + "nasm": [ + "gen/crypto/aes128gcmsiv-x86_64-win.asm", + "gen/crypto/chacha-x86-win.asm", + "gen/crypto/chacha-x86_64-win.asm", + "gen/crypto/chacha20_poly1305_x86_64-win.asm" + ] + }, + "crypto_test": { + "srcs": [ + "crypto/abi_self_test.cc", + "crypto/asn1/asn1_test.cc", + "crypto/base64/base64_test.cc", + "crypto/bio/bio_test.cc", + "crypto/blake2/blake2_test.cc", + "crypto/buf/buf_test.cc", + "crypto/bytestring/bytestring_test.cc", + "crypto/chacha/chacha_test.cc", + "crypto/cipher_extra/aead_test.cc", + "crypto/cipher_extra/cipher_test.cc", + "crypto/compiler_test.cc", + "crypto/conf/conf_test.cc", + "crypto/constant_time_test.cc", + "crypto/cpu_arm_linux_test.cc", + "crypto/crypto_test.cc", + "crypto/curve25519/ed25519_test.cc", + "crypto/curve25519/spake25519_test.cc", + "crypto/curve25519/x25519_test.cc", + "crypto/dh_extra/dh_test.cc", + "crypto/digest_extra/digest_test.cc", + "crypto/dsa/dsa_test.cc", + "crypto/ecdh_extra/ecdh_test.cc", + "crypto/err/err_test.cc", + "crypto/evp/evp_extra_test.cc", + "crypto/evp/evp_test.cc", + "crypto/evp/pbkdf_test.cc", + "crypto/evp/scrypt_test.cc", + "crypto/fipsmodule/aes/aes_test.cc", + "crypto/fipsmodule/bn/bn_test.cc", + "crypto/fipsmodule/cmac/cmac_test.cc", + "crypto/fipsmodule/ec/ec_test.cc", + "crypto/fipsmodule/ec/p256-nistz_test.cc", + "crypto/fipsmodule/ec/p256_test.cc", + "crypto/fipsmodule/ecdsa/ecdsa_test.cc", + "crypto/fipsmodule/hkdf/hkdf_test.cc", + "crypto/fipsmodule/md5/md5_test.cc", + "crypto/fipsmodule/modes/gcm_test.cc", + "crypto/fipsmodule/rand/ctrdrbg_test.cc", + "crypto/fipsmodule/rand/fork_detect_test.cc", + "crypto/fipsmodule/service_indicator/service_indicator_test.cc", + "crypto/fipsmodule/sha/sha_test.cc", + "crypto/hmac_extra/hmac_test.cc", + "crypto/hpke/hpke_test.cc", + "crypto/hrss/hrss_test.cc", + "crypto/impl_dispatch_test.cc", + "crypto/keccak/keccak_test.cc", + "crypto/kyber/kyber_test.cc", + "crypto/lhash/lhash_test.cc", + "crypto/obj/obj_test.cc", + "crypto/pem/pem_test.cc", + "crypto/pkcs7/pkcs7_test.cc", + "crypto/pkcs8/pkcs12_test.cc", + "crypto/pkcs8/pkcs8_test.cc", + "crypto/poly1305/poly1305_test.cc", + "crypto/pool/pool_test.cc", + "crypto/rand_extra/getentropy_test.cc", + "crypto/rand_extra/rand_test.cc", + "crypto/refcount_test.cc", + "crypto/rsa_extra/rsa_test.cc", + "crypto/self_test.cc", + "crypto/siphash/siphash_test.cc", + "crypto/spx/spx_test.cc", + "crypto/stack/stack_test.cc", + "crypto/test/gtest_main.cc", + "crypto/thread_test.cc", + "crypto/trust_token/trust_token_test.cc", + "crypto/x509/tab_test.cc", + "crypto/x509/x509_test.cc", + "crypto/x509/x509_time_test.cc" + ], + "data": [ + "crypto/blake2/blake2b256_tests.txt", + "crypto/cipher_extra/test/aes_128_cbc_sha1_tls_implicit_iv_tests.txt", + "crypto/cipher_extra/test/aes_128_cbc_sha1_tls_tests.txt", + "crypto/cipher_extra/test/aes_128_ccm_bluetooth_8_tests.txt", + "crypto/cipher_extra/test/aes_128_ccm_bluetooth_tests.txt", + "crypto/cipher_extra/test/aes_128_ccm_matter_tests.txt", + "crypto/cipher_extra/test/aes_128_ctr_hmac_sha256.txt", + "crypto/cipher_extra/test/aes_128_gcm_randnonce_tests.txt", + "crypto/cipher_extra/test/aes_128_gcm_siv_tests.txt", + "crypto/cipher_extra/test/aes_128_gcm_tests.txt", + "crypto/cipher_extra/test/aes_192_gcm_tests.txt", + "crypto/cipher_extra/test/aes_256_cbc_sha1_tls_implicit_iv_tests.txt", + "crypto/cipher_extra/test/aes_256_cbc_sha1_tls_tests.txt", + "crypto/cipher_extra/test/aes_256_ctr_hmac_sha256.txt", + "crypto/cipher_extra/test/aes_256_gcm_randnonce_tests.txt", + "crypto/cipher_extra/test/aes_256_gcm_siv_tests.txt", + "crypto/cipher_extra/test/aes_256_gcm_tests.txt", + "crypto/cipher_extra/test/chacha20_poly1305_tests.txt", + "crypto/cipher_extra/test/cipher_tests.txt", + "crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_implicit_iv_tests.txt", + "crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_tests.txt", + "crypto/cipher_extra/test/nist_cavp/aes_128_cbc.txt", + "crypto/cipher_extra/test/nist_cavp/aes_128_ctr.txt", + "crypto/cipher_extra/test/nist_cavp/aes_128_gcm.txt", + "crypto/cipher_extra/test/nist_cavp/aes_192_cbc.txt", + "crypto/cipher_extra/test/nist_cavp/aes_192_ctr.txt", + "crypto/cipher_extra/test/nist_cavp/aes_256_cbc.txt", + "crypto/cipher_extra/test/nist_cavp/aes_256_ctr.txt", + "crypto/cipher_extra/test/nist_cavp/aes_256_gcm.txt", + "crypto/cipher_extra/test/nist_cavp/tdes_cbc.txt", + "crypto/cipher_extra/test/nist_cavp/tdes_ecb.txt", + "crypto/cipher_extra/test/xchacha20_poly1305_tests.txt", + "crypto/curve25519/ed25519_tests.txt", + "crypto/ecdh_extra/ecdh_tests.txt", + "crypto/evp/evp_tests.txt", + "crypto/evp/scrypt_tests.txt", + "crypto/fipsmodule/aes/aes_tests.txt", + "crypto/fipsmodule/bn/test/exp_tests.txt", + "crypto/fipsmodule/bn/test/gcd_tests.txt", + "crypto/fipsmodule/bn/test/miller_rabin_tests.txt", + "crypto/fipsmodule/bn/test/mod_exp_tests.txt", + "crypto/fipsmodule/bn/test/mod_inv_tests.txt", + "crypto/fipsmodule/bn/test/mod_mul_tests.txt", + "crypto/fipsmodule/bn/test/mod_sqrt_tests.txt", + "crypto/fipsmodule/bn/test/product_tests.txt", + "crypto/fipsmodule/bn/test/quotient_tests.txt", + "crypto/fipsmodule/bn/test/shift_tests.txt", + "crypto/fipsmodule/bn/test/sum_tests.txt", + "crypto/fipsmodule/cmac/cavp_3des_cmac_tests.txt", + "crypto/fipsmodule/cmac/cavp_aes128_cmac_tests.txt", + "crypto/fipsmodule/cmac/cavp_aes192_cmac_tests.txt", + "crypto/fipsmodule/cmac/cavp_aes256_cmac_tests.txt", + "crypto/fipsmodule/ec/ec_scalar_base_mult_tests.txt", + "crypto/fipsmodule/ec/p256-nistz_tests.txt", + "crypto/fipsmodule/ecdsa/ecdsa_sign_tests.txt", + "crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt", + "crypto/fipsmodule/modes/gcm_tests.txt", + "crypto/fipsmodule/rand/ctrdrbg_vectors.txt", + "crypto/hmac_extra/hmac_tests.txt", + "crypto/hpke/hpke_test_vectors.txt", + "crypto/keccak/keccak_tests.txt", + "crypto/kyber/kyber_tests.txt", + "crypto/pkcs8/test/bad1.p12", + "crypto/pkcs8/test/bad2.p12", + "crypto/pkcs8/test/bad3.p12", + "crypto/pkcs8/test/empty_password.p12", + "crypto/pkcs8/test/empty_password_ber.p12", + "crypto/pkcs8/test/empty_password_ber_nested.p12", + "crypto/pkcs8/test/no_encryption.p12", + "crypto/pkcs8/test/nss.p12", + "crypto/pkcs8/test/null_password.p12", + "crypto/pkcs8/test/openssl.p12", + "crypto/pkcs8/test/pbes2_sha1.p12", + "crypto/pkcs8/test/pbes2_sha256.p12", + "crypto/pkcs8/test/unicode_password.p12", + "crypto/pkcs8/test/windows.p12", + "crypto/poly1305/poly1305_tests.txt", + "crypto/siphash/siphash_tests.txt", + "crypto/spx/spx_tests.txt", + "crypto/spx/spx_tests_deterministic.txt", + "crypto/x509/test/basic_constraints_ca.pem", + "crypto/x509/test/basic_constraints_ca_pathlen_0.pem", + "crypto/x509/test/basic_constraints_ca_pathlen_1.pem", + "crypto/x509/test/basic_constraints_ca_pathlen_10.pem", + "crypto/x509/test/basic_constraints_leaf.pem", + "crypto/x509/test/basic_constraints_none.pem", + "crypto/x509/test/invalid_extension_intermediate.pem", + "crypto/x509/test/invalid_extension_intermediate_authority_key_identifier.pem", + "crypto/x509/test/invalid_extension_intermediate_basic_constraints.pem", + "crypto/x509/test/invalid_extension_intermediate_ext_key_usage.pem", + "crypto/x509/test/invalid_extension_intermediate_key_usage.pem", + "crypto/x509/test/invalid_extension_intermediate_name_constraints.pem", + "crypto/x509/test/invalid_extension_intermediate_subject_alt_name.pem", + "crypto/x509/test/invalid_extension_intermediate_subject_key_identifier.pem", + "crypto/x509/test/invalid_extension_leaf.pem", + "crypto/x509/test/invalid_extension_leaf_authority_key_identifier.pem", + "crypto/x509/test/invalid_extension_leaf_basic_constraints.pem", + "crypto/x509/test/invalid_extension_leaf_ext_key_usage.pem", + "crypto/x509/test/invalid_extension_leaf_key_usage.pem", + "crypto/x509/test/invalid_extension_leaf_name_constraints.pem", + "crypto/x509/test/invalid_extension_leaf_subject_alt_name.pem", + "crypto/x509/test/invalid_extension_leaf_subject_key_identifier.pem", + "crypto/x509/test/invalid_extension_root.pem", + "crypto/x509/test/invalid_extension_root_authority_key_identifier.pem", + "crypto/x509/test/invalid_extension_root_basic_constraints.pem", + "crypto/x509/test/invalid_extension_root_ext_key_usage.pem", + "crypto/x509/test/invalid_extension_root_key_usage.pem", + "crypto/x509/test/invalid_extension_root_name_constraints.pem", + "crypto/x509/test/invalid_extension_root_subject_alt_name.pem", + "crypto/x509/test/invalid_extension_root_subject_key_identifier.pem", + "crypto/x509/test/many_constraints.pem", + "crypto/x509/test/many_names1.pem", + "crypto/x509/test/many_names2.pem", + "crypto/x509/test/many_names3.pem", + "crypto/x509/test/policy_intermediate.pem", + "crypto/x509/test/policy_intermediate_any.pem", + "crypto/x509/test/policy_intermediate_duplicate.pem", + "crypto/x509/test/policy_intermediate_invalid.pem", + "crypto/x509/test/policy_intermediate_mapped.pem", + "crypto/x509/test/policy_intermediate_mapped_any.pem", + "crypto/x509/test/policy_intermediate_mapped_oid3.pem", + "crypto/x509/test/policy_intermediate_require.pem", + "crypto/x509/test/policy_intermediate_require1.pem", + "crypto/x509/test/policy_intermediate_require2.pem", + "crypto/x509/test/policy_intermediate_require_duplicate.pem", + "crypto/x509/test/policy_intermediate_require_no_policies.pem", + "crypto/x509/test/policy_leaf.pem", + "crypto/x509/test/policy_leaf_any.pem", + "crypto/x509/test/policy_leaf_duplicate.pem", + "crypto/x509/test/policy_leaf_invalid.pem", + "crypto/x509/test/policy_leaf_none.pem", + "crypto/x509/test/policy_leaf_oid1.pem", + "crypto/x509/test/policy_leaf_oid2.pem", + "crypto/x509/test/policy_leaf_oid3.pem", + "crypto/x509/test/policy_leaf_oid4.pem", + "crypto/x509/test/policy_leaf_oid5.pem", + "crypto/x509/test/policy_leaf_require.pem", + "crypto/x509/test/policy_leaf_require1.pem", + "crypto/x509/test/policy_root.pem", + "crypto/x509/test/policy_root2.pem", + "crypto/x509/test/policy_root_cross_inhibit_mapping.pem", + "crypto/x509/test/pss_sha1.pem", + "crypto/x509/test/pss_sha1_explicit.pem", + "crypto/x509/test/pss_sha1_mgf1_syntax_error.pem", + "crypto/x509/test/pss_sha224.pem", + "crypto/x509/test/pss_sha256.pem", + "crypto/x509/test/pss_sha256_explicit_trailer.pem", + "crypto/x509/test/pss_sha256_mgf1_sha384.pem", + "crypto/x509/test/pss_sha256_mgf1_syntax_error.pem", + "crypto/x509/test/pss_sha256_omit_nulls.pem", + "crypto/x509/test/pss_sha256_salt31.pem", + "crypto/x509/test/pss_sha256_salt_overflow.pem", + "crypto/x509/test/pss_sha256_unknown_mgf.pem", + "crypto/x509/test/pss_sha256_wrong_trailer.pem", + "crypto/x509/test/pss_sha384.pem", + "crypto/x509/test/pss_sha512.pem", + "crypto/x509/test/some_names1.pem", + "crypto/x509/test/some_names2.pem", + "crypto/x509/test/some_names3.pem", + "crypto/x509/test/trailing_data_leaf_authority_key_identifier.pem", + "crypto/x509/test/trailing_data_leaf_basic_constraints.pem", + "crypto/x509/test/trailing_data_leaf_ext_key_usage.pem", + "crypto/x509/test/trailing_data_leaf_key_usage.pem", + "crypto/x509/test/trailing_data_leaf_name_constraints.pem", + "crypto/x509/test/trailing_data_leaf_subject_alt_name.pem", + "crypto/x509/test/trailing_data_leaf_subject_key_identifier.pem", + "third_party/wycheproof_testvectors/aes_cbc_pkcs5_test.txt", + "third_party/wycheproof_testvectors/aes_cmac_test.txt", + "third_party/wycheproof_testvectors/aes_gcm_siv_test.txt", + "third_party/wycheproof_testvectors/aes_gcm_test.txt", + "third_party/wycheproof_testvectors/chacha20_poly1305_test.txt", + "third_party/wycheproof_testvectors/dsa_test.txt", + "third_party/wycheproof_testvectors/ecdh_secp224r1_test.txt", + "third_party/wycheproof_testvectors/ecdh_secp256r1_test.txt", + "third_party/wycheproof_testvectors/ecdh_secp384r1_test.txt", + "third_party/wycheproof_testvectors/ecdh_secp521r1_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp224r1_sha224_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp224r1_sha256_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp224r1_sha512_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp256r1_sha256_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp256r1_sha512_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp384r1_sha384_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp384r1_sha512_test.txt", + "third_party/wycheproof_testvectors/ecdsa_secp521r1_sha512_test.txt", + "third_party/wycheproof_testvectors/eddsa_test.txt", + "third_party/wycheproof_testvectors/hkdf_sha1_test.txt", + "third_party/wycheproof_testvectors/hkdf_sha256_test.txt", + "third_party/wycheproof_testvectors/hkdf_sha384_test.txt", + "third_party/wycheproof_testvectors/hkdf_sha512_test.txt", + "third_party/wycheproof_testvectors/hmac_sha1_test.txt", + "third_party/wycheproof_testvectors/hmac_sha224_test.txt", + "third_party/wycheproof_testvectors/hmac_sha256_test.txt", + "third_party/wycheproof_testvectors/hmac_sha384_test.txt", + "third_party/wycheproof_testvectors/hmac_sha512_test.txt", + "third_party/wycheproof_testvectors/kw_test.txt", + "third_party/wycheproof_testvectors/kwp_test.txt", + "third_party/wycheproof_testvectors/primality_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha1_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha224_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha224_mgf1sha224_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha256_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha256_mgf1sha256_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha384_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha384_mgf1sha384_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha512_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_2048_sha512_mgf1sha512_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_3072_sha256_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_3072_sha256_mgf1sha256_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_3072_sha512_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_3072_sha512_mgf1sha512_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_4096_sha256_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_4096_sha256_mgf1sha256_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_4096_sha512_mgf1sha1_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_4096_sha512_mgf1sha512_test.txt", + "third_party/wycheproof_testvectors/rsa_oaep_misc_test.txt", + "third_party/wycheproof_testvectors/rsa_pkcs1_2048_test.txt", + "third_party/wycheproof_testvectors/rsa_pkcs1_3072_test.txt", + "third_party/wycheproof_testvectors/rsa_pkcs1_4096_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_2048_sha1_mgf1_20_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_2048_sha256_mgf1_0_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_2048_sha256_mgf1_32_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_3072_sha256_mgf1_32_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_4096_sha256_mgf1_32_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_4096_sha512_mgf1_32_test.txt", + "third_party/wycheproof_testvectors/rsa_pss_misc_test.txt", + "third_party/wycheproof_testvectors/rsa_sig_gen_misc_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_2048_sha224_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_2048_sha256_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_2048_sha384_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_2048_sha512_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_3072_sha256_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_3072_sha384_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_3072_sha512_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_4096_sha384_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_4096_sha512_test.txt", + "third_party/wycheproof_testvectors/rsa_signature_test.txt", + "third_party/wycheproof_testvectors/x25519_test.txt", + "third_party/wycheproof_testvectors/xchacha20_poly1305_test.txt" + ] + }, + "decrepit": { + "srcs": [ + "decrepit/bio/base64_bio.c", + "decrepit/blowfish/blowfish.c", + "decrepit/cast/cast.c", + "decrepit/cast/cast_tables.c", + "decrepit/cfb/cfb.c", + "decrepit/des/cfb64ede.c", + "decrepit/dh/dh_decrepit.c", + "decrepit/dsa/dsa_decrepit.c", + "decrepit/evp/dss1.c", + "decrepit/evp/evp_do_all.c", + "decrepit/obj/obj_decrepit.c", + "decrepit/rc4/rc4_decrepit.c", + "decrepit/ripemd/ripemd.c", + "decrepit/rsa/rsa_decrepit.c", + "decrepit/ssl/ssl_decrepit.c", + "decrepit/x509/x509_decrepit.c", + "decrepit/xts/xts.c" + ] + }, + "decrepit_test": { + "srcs": [ + "crypto/test/gtest_main.cc", + "decrepit/blowfish/blowfish_test.cc", + "decrepit/cast/cast_test.cc", + "decrepit/cfb/cfb_test.cc", + "decrepit/des/des_test.cc", + "decrepit/evp/evp_test.cc", + "decrepit/ripemd/ripemd_test.cc", + "decrepit/xts/xts_test.cc" + ] + }, + "pki": { + "srcs": [ + "pki/cert_error_id.cc", + "pki/cert_error_params.cc", + "pki/cert_errors.cc", + "pki/cert_issuer_source_static.cc", + "pki/certificate.cc", + "pki/certificate_policies.cc", + "pki/common_cert_errors.cc", + "pki/crl.cc", + "pki/encode_values.cc", + "pki/extended_key_usage.cc", + "pki/general_names.cc", + "pki/input.cc", + "pki/ip_util.cc", + "pki/name_constraints.cc", + "pki/ocsp.cc", + "pki/ocsp_verify_result.cc", + "pki/parse_certificate.cc", + "pki/parse_name.cc", + "pki/parse_values.cc", + "pki/parsed_certificate.cc", + "pki/parser.cc", + "pki/path_builder.cc", + "pki/pem.cc", + "pki/revocation_util.cc", + "pki/signature_algorithm.cc", + "pki/simple_path_builder_delegate.cc", + "pki/string_util.cc", + "pki/trust_store.cc", + "pki/trust_store_collection.cc", + "pki/trust_store_in_memory.cc", + "pki/verify_certificate_chain.cc", + "pki/verify_error.cc", + "pki/verify_name_match.cc", + "pki/verify_signed_data.cc" + ], + "hdrs": [ + "include/openssl/pki/certificate.h", + "include/openssl/pki/signature_verify_cache.h", + "include/openssl/pki/verify_error.h" + ], + "internal_hdrs": [ + "pki/cert_error_id.h", + "pki/cert_error_params.h", + "pki/cert_errors.h", + "pki/cert_issuer_source.h", + "pki/cert_issuer_source_static.h", + "pki/cert_issuer_source_sync_unittest.h", + "pki/certificate_policies.h", + "pki/common_cert_errors.h", + "pki/crl.h", + "pki/encode_values.h", + "pki/extended_key_usage.h", + "pki/general_names.h", + "pki/input.h", + "pki/ip_util.h", + "pki/mock_signature_verify_cache.h", + "pki/name_constraints.h", + "pki/nist_pkits_unittest.h", + "pki/ocsp.h", + "pki/ocsp_revocation_status.h", + "pki/ocsp_verify_result.h", + "pki/parse_certificate.h", + "pki/parse_name.h", + "pki/parse_values.h", + "pki/parsed_certificate.h", + "pki/parser.h", + "pki/path_builder.h", + "pki/pem.h", + "pki/revocation_util.h", + "pki/signature_algorithm.h", + "pki/simple_path_builder_delegate.h", + "pki/string_util.h", + "pki/test_helpers.h", + "pki/testdata/nist-pkits/pkits_testcases-inl.h", + "pki/trust_store.h", + "pki/trust_store_collection.h", + "pki/trust_store_in_memory.h", + "pki/verify_certificate_chain.h", + "pki/verify_certificate_chain_typed_unittest.h", + "pki/verify_name_match.h", + "pki/verify_signed_data.h" + ] + }, + "pki_test": { + "srcs": [ + "crypto/test/gtest_main.cc", + "pki/cert_issuer_source_static_unittest.cc", + "pki/certificate_policies_unittest.cc", + "pki/certificate_unittest.cc", + "pki/crl_unittest.cc", + "pki/encode_values_unittest.cc", + "pki/extended_key_usage_unittest.cc", + "pki/general_names_unittest.cc", + "pki/input_unittest.cc", + "pki/ip_util_unittest.cc", + "pki/mock_signature_verify_cache.cc", + "pki/name_constraints_unittest.cc", + "pki/nist_pkits_unittest.cc", + "pki/ocsp_unittest.cc", + "pki/parse_certificate_unittest.cc", + "pki/parse_name_unittest.cc", + "pki/parse_values_unittest.cc", + "pki/parsed_certificate_unittest.cc", + "pki/parser_unittest.cc", + "pki/path_builder_pkits_unittest.cc", + "pki/path_builder_unittest.cc", + "pki/path_builder_verify_certificate_chain_unittest.cc", + "pki/pem_unittest.cc", + "pki/signature_algorithm_unittest.cc", + "pki/simple_path_builder_delegate_unittest.cc", + "pki/string_util_unittest.cc", + "pki/test_helpers.cc", + "pki/trust_store_collection_unittest.cc", + "pki/trust_store_in_memory_unittest.cc", + "pki/verify_certificate_chain_pkits_unittest.cc", + "pki/verify_certificate_chain_unittest.cc", + "pki/verify_name_match_unittest.cc", + "pki/verify_signed_data_unittest.cc" + ], + "data": [ + "pki/testdata/cert_issuer_source_static_unittest/c1.pem", + "pki/testdata/cert_issuer_source_static_unittest/c2.pem", + "pki/testdata/cert_issuer_source_static_unittest/d.pem", + "pki/testdata/cert_issuer_source_static_unittest/e1.pem", + "pki/testdata/cert_issuer_source_static_unittest/e2.pem", + "pki/testdata/cert_issuer_source_static_unittest/i1_1.pem", + "pki/testdata/cert_issuer_source_static_unittest/i1_2.pem", + "pki/testdata/cert_issuer_source_static_unittest/i2.pem", + "pki/testdata/cert_issuer_source_static_unittest/i3_1.pem", + "pki/testdata/cert_issuer_source_static_unittest/i3_2.pem", + "pki/testdata/cert_issuer_source_static_unittest/root.pem", + "pki/testdata/certificate_policies_unittest/anypolicy.pem", + "pki/testdata/certificate_policies_unittest/anypolicy_with_qualifier.pem", + "pki/testdata/certificate_policies_unittest/invalid-anypolicy_with_custom_qualifier.pem", + "pki/testdata/certificate_policies_unittest/invalid-empty.pem", + "pki/testdata/certificate_policies_unittest/invalid-policy_1_2_3_dupe.pem", + "pki/testdata/certificate_policies_unittest/invalid-policy_1_2_3_policyinformation_unconsumed_data.pem", + "pki/testdata/certificate_policies_unittest/invalid-policy_1_2_3_policyqualifierinfo_unconsumed_data.pem", + "pki/testdata/certificate_policies_unittest/invalid-policy_1_2_3_with_empty_qualifiers_sequence.pem", + "pki/testdata/certificate_policies_unittest/invalid-policy_identifier_not_oid.pem", + "pki/testdata/certificate_policies_unittest/policy_1_2_3.pem", + "pki/testdata/certificate_policies_unittest/policy_1_2_3_and_1_2_4.pem", + "pki/testdata/certificate_policies_unittest/policy_1_2_3_and_1_2_4_with_qualifiers.pem", + "pki/testdata/certificate_policies_unittest/policy_1_2_3_with_custom_qualifier.pem", + "pki/testdata/certificate_policies_unittest/policy_1_2_3_with_qualifier.pem", + "pki/testdata/crl_unittest/bad_crldp_has_crlissuer.pem", + "pki/testdata/crl_unittest/bad_fake_critical_crlentryextension.pem", + "pki/testdata/crl_unittest/bad_fake_critical_extension.pem", + "pki/testdata/crl_unittest/bad_idp_contains_wrong_uri.pem", + "pki/testdata/crl_unittest/bad_idp_indirectcrl.pem", + "pki/testdata/crl_unittest/bad_idp_onlycontainscacerts.pem", + "pki/testdata/crl_unittest/bad_idp_onlycontainscacerts_no_basic_constraints.pem", + "pki/testdata/crl_unittest/bad_idp_onlycontainsusercerts.pem", + "pki/testdata/crl_unittest/bad_idp_uri_and_onlycontainscacerts.pem", + "pki/testdata/crl_unittest/bad_idp_uri_and_onlycontainsusercerts.pem", + "pki/testdata/crl_unittest/bad_key_rollover_signature.pem", + "pki/testdata/crl_unittest/bad_nextupdate_too_old.pem", + "pki/testdata/crl_unittest/bad_signature.pem", + "pki/testdata/crl_unittest/bad_thisupdate_in_future.pem", + "pki/testdata/crl_unittest/bad_thisupdate_too_old.pem", + "pki/testdata/crl_unittest/bad_wrong_issuer.pem", + "pki/testdata/crl_unittest/good.pem", + "pki/testdata/crl_unittest/good_fake_extension.pem", + "pki/testdata/crl_unittest/good_fake_extension_no_nextupdate.pem", + "pki/testdata/crl_unittest/good_generalizedtime.pem", + "pki/testdata/crl_unittest/good_idp_contains_uri.pem", + "pki/testdata/crl_unittest/good_idp_onlycontainscacerts.pem", + "pki/testdata/crl_unittest/good_idp_onlycontainsusercerts.pem", + "pki/testdata/crl_unittest/good_idp_onlycontainsusercerts_no_basic_constraints.pem", + "pki/testdata/crl_unittest/good_idp_uri_and_onlycontainscacerts.pem", + "pki/testdata/crl_unittest/good_idp_uri_and_onlycontainsusercerts.pem", + "pki/testdata/crl_unittest/good_issuer_name_normalization.pem", + "pki/testdata/crl_unittest/good_issuer_no_keyusage.pem", + "pki/testdata/crl_unittest/good_key_rollover.pem", + "pki/testdata/crl_unittest/good_no_crldp.pem", + "pki/testdata/crl_unittest/good_no_nextupdate.pem", + "pki/testdata/crl_unittest/good_no_version.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_crlentryextensions.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_extensions.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_nextupdate.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_revocationdate.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_revokedcerts.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_signaturevalue.pem", + "pki/testdata/crl_unittest/invalid_garbage_after_thisupdate.pem", + "pki/testdata/crl_unittest/invalid_garbage_crlentry.pem", + "pki/testdata/crl_unittest/invalid_garbage_issuer_name.pem", + "pki/testdata/crl_unittest/invalid_garbage_revocationdate.pem", + "pki/testdata/crl_unittest/invalid_garbage_revoked_serial_number.pem", + "pki/testdata/crl_unittest/invalid_garbage_signaturealgorithm.pem", + "pki/testdata/crl_unittest/invalid_garbage_signaturevalue.pem", + "pki/testdata/crl_unittest/invalid_garbage_tbs_signature_algorithm.pem", + "pki/testdata/crl_unittest/invalid_garbage_tbscertlist.pem", + "pki/testdata/crl_unittest/invalid_garbage_thisupdate.pem", + "pki/testdata/crl_unittest/invalid_garbage_version.pem", + "pki/testdata/crl_unittest/invalid_idp_dpname_choice_extra_data.pem", + "pki/testdata/crl_unittest/invalid_idp_empty_sequence.pem", + "pki/testdata/crl_unittest/invalid_idp_onlycontains_user_and_ca_certs.pem", + "pki/testdata/crl_unittest/invalid_idp_onlycontainsusercerts_v1_leaf.pem", + "pki/testdata/crl_unittest/invalid_issuer_keyusage_no_crlsign.pem", + "pki/testdata/crl_unittest/invalid_key_rollover_issuer_keyusage_no_crlsign.pem", + "pki/testdata/crl_unittest/invalid_mismatched_signature_algorithm.pem", + "pki/testdata/crl_unittest/invalid_revoked_empty_sequence.pem", + "pki/testdata/crl_unittest/invalid_v1_explicit.pem", + "pki/testdata/crl_unittest/invalid_v1_with_crlentryextension.pem", + "pki/testdata/crl_unittest/invalid_v1_with_extension.pem", + "pki/testdata/crl_unittest/invalid_v3.pem", + "pki/testdata/crl_unittest/revoked.pem", + "pki/testdata/crl_unittest/revoked_fake_crlentryextension.pem", + "pki/testdata/crl_unittest/revoked_generalized_revocationdate.pem", + "pki/testdata/crl_unittest/revoked_key_rollover.pem", + "pki/testdata/crl_unittest/revoked_no_nextupdate.pem", + "pki/testdata/name_constraints_unittest/directoryname-excludeall.pem", + "pki/testdata/name_constraints_unittest/directoryname-excluded.pem", + "pki/testdata/name_constraints_unittest/directoryname.pem", + "pki/testdata/name_constraints_unittest/directoryname_and_dnsname.pem", + "pki/testdata/name_constraints_unittest/directoryname_and_dnsname_and_ipaddress.pem", + "pki/testdata/name_constraints_unittest/dnsname-exclude_dot.pem", + "pki/testdata/name_constraints_unittest/dnsname-excludeall.pem", + "pki/testdata/name_constraints_unittest/dnsname-excluded.pem", + "pki/testdata/name_constraints_unittest/dnsname-excluded_with_leading_dot.pem", + "pki/testdata/name_constraints_unittest/dnsname-permitted_two_dot.pem", + "pki/testdata/name_constraints_unittest/dnsname-permitted_with_leading_dot.pem", + "pki/testdata/name_constraints_unittest/dnsname-with_max.pem", + "pki/testdata/name_constraints_unittest/dnsname-with_min_0.pem", + "pki/testdata/name_constraints_unittest/dnsname-with_min_0_and_max.pem", + "pki/testdata/name_constraints_unittest/dnsname-with_min_1.pem", + "pki/testdata/name_constraints_unittest/dnsname-with_min_1_and_max.pem", + "pki/testdata/name_constraints_unittest/dnsname.pem", + "pki/testdata/name_constraints_unittest/dnsname2.pem", + "pki/testdata/name_constraints_unittest/edipartyname-excluded.pem", + "pki/testdata/name_constraints_unittest/edipartyname-permitted.pem", + "pki/testdata/name_constraints_unittest/invalid-empty_excluded_subtree.pem", + "pki/testdata/name_constraints_unittest/invalid-empty_permitted_subtree.pem", + "pki/testdata/name_constraints_unittest/invalid-no_subtrees.pem", + "pki/testdata/name_constraints_unittest/ipaddress-excludeall.pem", + "pki/testdata/name_constraints_unittest/ipaddress-excluded.pem", + "pki/testdata/name_constraints_unittest/ipaddress-invalid_addr.pem", + "pki/testdata/name_constraints_unittest/ipaddress-invalid_mask_not_contiguous_1.pem", + "pki/testdata/name_constraints_unittest/ipaddress-invalid_mask_not_contiguous_2.pem", + "pki/testdata/name_constraints_unittest/ipaddress-invalid_mask_not_contiguous_3.pem", + "pki/testdata/name_constraints_unittest/ipaddress-invalid_mask_not_contiguous_4.pem", + "pki/testdata/name_constraints_unittest/ipaddress-mapped_addrs.pem", + "pki/testdata/name_constraints_unittest/ipaddress-permit_all.pem", + "pki/testdata/name_constraints_unittest/ipaddress-permit_prefix1.pem", + "pki/testdata/name_constraints_unittest/ipaddress-permit_prefix31.pem", + "pki/testdata/name_constraints_unittest/ipaddress-permit_singlehost.pem", + "pki/testdata/name_constraints_unittest/ipaddress.pem", + "pki/testdata/name_constraints_unittest/name-ca.pem", + "pki/testdata/name_constraints_unittest/name-de.pem", + "pki/testdata/name_constraints_unittest/name-empty.pem", + "pki/testdata/name_constraints_unittest/name-jp-tokyo.pem", + "pki/testdata/name_constraints_unittest/name-jp.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-1.1.1.1.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-192.168.1.1.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-email-invalidstring.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-email-localpartcase.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-email-multiple.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-email.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-foo.com.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-ipv6.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona-permitted.example.com.pem", + "pki/testdata/name_constraints_unittest/name-us-arizona.pem", + "pki/testdata/name_constraints_unittest/name-us-california-192.168.1.1.pem", + "pki/testdata/name_constraints_unittest/name-us-california-mountain_view.pem", + "pki/testdata/name_constraints_unittest/name-us-california-permitted.example.com.pem", + "pki/testdata/name_constraints_unittest/name-us-california.pem", + "pki/testdata/name_constraints_unittest/name-us.pem", + "pki/testdata/name_constraints_unittest/othername-excluded.pem", + "pki/testdata/name_constraints_unittest/othername-permitted.pem", + "pki/testdata/name_constraints_unittest/registeredid-excluded.pem", + "pki/testdata/name_constraints_unittest/registeredid-permitted.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded-empty.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded-hostname.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded-hostnamewithat.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded-ipv4.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded-quoted.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded-subdomains.pem", + "pki/testdata/name_constraints_unittest/rfc822name-excluded.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted-empty.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted-hostname.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted-hostnamewithat.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted-ipv4.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted-quoted.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted-subdomains.pem", + "pki/testdata/name_constraints_unittest/rfc822name-permitted.pem", + "pki/testdata/name_constraints_unittest/san-directoryname.pem", + "pki/testdata/name_constraints_unittest/san-dnsname.pem", + "pki/testdata/name_constraints_unittest/san-edipartyname.pem", + "pki/testdata/name_constraints_unittest/san-excluded-directoryname.pem", + "pki/testdata/name_constraints_unittest/san-excluded-dnsname.pem", + "pki/testdata/name_constraints_unittest/san-excluded-ipaddress.pem", + "pki/testdata/name_constraints_unittest/san-invalid-empty.pem", + "pki/testdata/name_constraints_unittest/san-invalid-ipaddress.pem", + "pki/testdata/name_constraints_unittest/san-ipaddress4.pem", + "pki/testdata/name_constraints_unittest/san-ipaddress6.pem", + "pki/testdata/name_constraints_unittest/san-othername.pem", + "pki/testdata/name_constraints_unittest/san-permitted.pem", + "pki/testdata/name_constraints_unittest/san-registeredid.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-domaincase.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-empty-localpart.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-empty.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-ipv4.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-localpartcase.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-multiple.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-no-at.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-quoted.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-subdomain-no-at.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-subdomain-two-ats.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-subdomain.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-subdomaincase.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name-two-ats.pem", + "pki/testdata/name_constraints_unittest/san-rfc822name.pem", + "pki/testdata/name_constraints_unittest/san-uri.pem", + "pki/testdata/name_constraints_unittest/san-x400address.pem", + "pki/testdata/name_constraints_unittest/uri-excluded.pem", + "pki/testdata/name_constraints_unittest/uri-permitted.pem", + "pki/testdata/name_constraints_unittest/x400address-excluded.pem", + "pki/testdata/name_constraints_unittest/x400address-permitted.pem", + "pki/testdata/nist-pkits/certs/AllCertificatesNoPoliciesTest2EE.crt", + "pki/testdata/nist-pkits/certs/AllCertificatesSamePoliciesTest10EE.crt", + "pki/testdata/nist-pkits/certs/AllCertificatesSamePoliciesTest13EE.crt", + "pki/testdata/nist-pkits/certs/AllCertificatesanyPolicyTest11EE.crt", + "pki/testdata/nist-pkits/certs/AnyPolicyTest14EE.crt", + "pki/testdata/nist-pkits/certs/BadCRLIssuerNameCACert.crt", + "pki/testdata/nist-pkits/certs/BadCRLSignatureCACert.crt", + "pki/testdata/nist-pkits/certs/BadSignedCACert.crt", + "pki/testdata/nist-pkits/certs/BadnotAfterDateCACert.crt", + "pki/testdata/nist-pkits/certs/BadnotBeforeDateCACert.crt", + "pki/testdata/nist-pkits/certs/BasicSelfIssuedCRLSigningKeyCACert.crt", + "pki/testdata/nist-pkits/certs/BasicSelfIssuedCRLSigningKeyCRLCert.crt", + "pki/testdata/nist-pkits/certs/BasicSelfIssuedNewKeyCACert.crt", + "pki/testdata/nist-pkits/certs/BasicSelfIssuedNewKeyOldWithNewCACert.crt", + "pki/testdata/nist-pkits/certs/BasicSelfIssuedOldKeyCACert.crt", + "pki/testdata/nist-pkits/certs/BasicSelfIssuedOldKeyNewWithOldCACert.crt", + "pki/testdata/nist-pkits/certs/CPSPointerQualifierTest20EE.crt", + "pki/testdata/nist-pkits/certs/DSACACert.crt", + "pki/testdata/nist-pkits/certs/DSAParametersInheritedCACert.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest12EE.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest3EE.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest4EE.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest5EE.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest7EE.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest8EE.crt", + "pki/testdata/nist-pkits/certs/DifferentPoliciesTest9EE.crt", + "pki/testdata/nist-pkits/certs/GeneralizedTimeCRLnextUpdateCACert.crt", + "pki/testdata/nist-pkits/certs/GoodCACert.crt", + "pki/testdata/nist-pkits/certs/GoodsubCACert.crt", + "pki/testdata/nist-pkits/certs/GoodsubCAPanyPolicyMapping1to2CACert.crt", + "pki/testdata/nist-pkits/certs/InvalidBadCRLIssuerNameTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidBadCRLSignatureTest4EE.crt", + "pki/testdata/nist-pkits/certs/InvalidBasicSelfIssuedCRLSigningKeyTest7EE.crt", + "pki/testdata/nist-pkits/certs/InvalidBasicSelfIssuedCRLSigningKeyTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidBasicSelfIssuedNewWithOldTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidBasicSelfIssuedOldWithNewTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidCASignatureTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidCAnotAfterDateTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidCAnotBeforeDateTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNSnameConstraintsTest31EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNSnameConstraintsTest33EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNSnameConstraintsTest38EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNandRFC822nameConstraintsTest28EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNandRFC822nameConstraintsTest29EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest12EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest13EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest15EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest16EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest17EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest20EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest7EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDNnameConstraintsTest9EE.crt", + "pki/testdata/nist-pkits/certs/InvalidDSASignatureTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvalidEESignatureTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvalidEEnotAfterDateTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvalidEEnotBeforeDateTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidIDPwithindirectCRLTest23EE.crt", + "pki/testdata/nist-pkits/certs/InvalidIDPwithindirectCRLTest26EE.crt", + "pki/testdata/nist-pkits/certs/InvalidLongSerialNumberTest18EE.crt", + "pki/testdata/nist-pkits/certs/InvalidMappingFromanyPolicyTest7EE.crt", + "pki/testdata/nist-pkits/certs/InvalidMappingToanyPolicyTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidMissingCRLTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidMissingbasicConstraintsTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidNameChainingOrderTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidNameChainingTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidNegativeSerialNumberTest15EE.crt", + "pki/testdata/nist-pkits/certs/InvalidOldCRLnextUpdateTest11EE.crt", + "pki/testdata/nist-pkits/certs/InvalidPolicyMappingTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvalidPolicyMappingTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidPolicyMappingTest4EE.crt", + "pki/testdata/nist-pkits/certs/InvalidRFC822nameConstraintsTest22EE.crt", + "pki/testdata/nist-pkits/certs/InvalidRFC822nameConstraintsTest24EE.crt", + "pki/testdata/nist-pkits/certs/InvalidRFC822nameConstraintsTest26EE.crt", + "pki/testdata/nist-pkits/certs/InvalidRevokedCATest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidRevokedEETest3EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedinhibitAnyPolicyTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedinhibitAnyPolicyTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedinhibitPolicyMappingTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedinhibitPolicyMappingTest11EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedinhibitPolicyMappingTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedinhibitPolicyMappingTest9EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedpathLenConstraintTest16EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedrequireExplicitPolicyTest7EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSelfIssuedrequireExplicitPolicyTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSeparateCertificateandCRLKeysTest20EE.crt", + "pki/testdata/nist-pkits/certs/InvalidSeparateCertificateandCRLKeysTest21EE.crt", + "pki/testdata/nist-pkits/certs/InvalidURInameConstraintsTest35EE.crt", + "pki/testdata/nist-pkits/certs/InvalidURInameConstraintsTest37EE.crt", + "pki/testdata/nist-pkits/certs/InvalidUnknownCRLEntryExtensionTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvalidUnknownCRLExtensionTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvalidUnknownCRLExtensionTest9EE.crt", + "pki/testdata/nist-pkits/certs/InvalidUnknownCriticalCertificateExtensionTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidWrongCRLTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcAFalseTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcAFalseTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcRLIssuerTest27EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcRLIssuerTest31EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcRLIssuerTest32EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcRLIssuerTest34EE.crt", + "pki/testdata/nist-pkits/certs/InvalidcRLIssuerTest35EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddeltaCRLIndicatorNoBaseTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddeltaCRLTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddeltaCRLTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddeltaCRLTest4EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddeltaCRLTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddeltaCRLTest9EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddistributionPointTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddistributionPointTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddistributionPointTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddistributionPointTest8EE.crt", + "pki/testdata/nist-pkits/certs/InvaliddistributionPointTest9EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitAnyPolicyTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitAnyPolicyTest4EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitAnyPolicyTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitAnyPolicyTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitPolicyMappingTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitPolicyMappingTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitPolicyMappingTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidinhibitPolicyMappingTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvalidkeyUsageCriticalcRLSignFalseTest4EE.crt", + "pki/testdata/nist-pkits/certs/InvalidkeyUsageCriticalkeyCertSignFalseTest1EE.crt", + "pki/testdata/nist-pkits/certs/InvalidkeyUsageNotCriticalcRLSignFalseTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidkeyUsageNotCriticalkeyCertSignFalseTest2EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlyContainsAttributeCertsTest14EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlyContainsCACertsTest12EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlyContainsUserCertsTest11EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlySomeReasonsTest15EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlySomeReasonsTest16EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlySomeReasonsTest17EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlySomeReasonsTest20EE.crt", + "pki/testdata/nist-pkits/certs/InvalidonlySomeReasonsTest21EE.crt", + "pki/testdata/nist-pkits/certs/InvalidpathLenConstraintTest10EE.crt", + "pki/testdata/nist-pkits/certs/InvalidpathLenConstraintTest11EE.crt", + "pki/testdata/nist-pkits/certs/InvalidpathLenConstraintTest12EE.crt", + "pki/testdata/nist-pkits/certs/InvalidpathLenConstraintTest5EE.crt", + "pki/testdata/nist-pkits/certs/InvalidpathLenConstraintTest6EE.crt", + "pki/testdata/nist-pkits/certs/InvalidpathLenConstraintTest9EE.crt", + "pki/testdata/nist-pkits/certs/Invalidpre2000CRLnextUpdateTest12EE.crt", + "pki/testdata/nist-pkits/certs/Invalidpre2000UTCEEnotAfterDateTest7EE.crt", + "pki/testdata/nist-pkits/certs/InvalidrequireExplicitPolicyTest3EE.crt", + "pki/testdata/nist-pkits/certs/InvalidrequireExplicitPolicyTest5EE.crt", + "pki/testdata/nist-pkits/certs/LongSerialNumberCACert.crt", + "pki/testdata/nist-pkits/certs/Mapping1to2CACert.crt", + "pki/testdata/nist-pkits/certs/MappingFromanyPolicyCACert.crt", + "pki/testdata/nist-pkits/certs/MappingToanyPolicyCACert.crt", + "pki/testdata/nist-pkits/certs/MissingbasicConstraintsCACert.crt", + "pki/testdata/nist-pkits/certs/NameOrderingCACert.crt", + "pki/testdata/nist-pkits/certs/NegativeSerialNumberCACert.crt", + "pki/testdata/nist-pkits/certs/NoCRLCACert.crt", + "pki/testdata/nist-pkits/certs/NoPoliciesCACert.crt", + "pki/testdata/nist-pkits/certs/NoissuingDistributionPointCACert.crt", + "pki/testdata/nist-pkits/certs/OldCRLnextUpdateCACert.crt", + "pki/testdata/nist-pkits/certs/OverlappingPoliciesTest6EE.crt", + "pki/testdata/nist-pkits/certs/P12Mapping1to3CACert.crt", + "pki/testdata/nist-pkits/certs/P12Mapping1to3subCACert.crt", + "pki/testdata/nist-pkits/certs/P12Mapping1to3subsubCACert.crt", + "pki/testdata/nist-pkits/certs/P1Mapping1to234CACert.crt", + "pki/testdata/nist-pkits/certs/P1Mapping1to234subCACert.crt", + "pki/testdata/nist-pkits/certs/P1anyPolicyMapping1to2CACert.crt", + "pki/testdata/nist-pkits/certs/PanyPolicyMapping1to2CACert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP1234CACert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP1234subCAP123Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP1234subsubCAP123P12Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP123CACert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP123subCAP12Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP123subsubCAP12P1Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP123subsubCAP12P2Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP123subsubsubCAP12P2P1Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP12CACert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP12subCAP1Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP12subsubCAP1P2Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP2subCA2Cert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP2subCACert.crt", + "pki/testdata/nist-pkits/certs/PoliciesP3CACert.crt", + "pki/testdata/nist-pkits/certs/RFC3280MandatoryAttributeTypesCACert.crt", + "pki/testdata/nist-pkits/certs/RFC3280OptionalAttributeTypesCACert.crt", + "pki/testdata/nist-pkits/certs/RevokedsubCACert.crt", + "pki/testdata/nist-pkits/certs/RolloverfromPrintableStringtoUTF8StringCACert.crt", + "pki/testdata/nist-pkits/certs/SeparateCertificateandCRLKeysCA2CRLSigningCert.crt", + "pki/testdata/nist-pkits/certs/SeparateCertificateandCRLKeysCA2CertificateSigningCACert.crt", + "pki/testdata/nist-pkits/certs/SeparateCertificateandCRLKeysCRLSigningCert.crt", + "pki/testdata/nist-pkits/certs/SeparateCertificateandCRLKeysCertificateSigningCACert.crt", + "pki/testdata/nist-pkits/certs/TrustAnchorRootCertificate.crt", + "pki/testdata/nist-pkits/certs/TwoCRLsCACert.crt", + "pki/testdata/nist-pkits/certs/UIDCACert.crt", + "pki/testdata/nist-pkits/certs/UTF8StringCaseInsensitiveMatchCACert.crt", + "pki/testdata/nist-pkits/certs/UTF8StringEncodedNamesCACert.crt", + "pki/testdata/nist-pkits/certs/UnknownCRLEntryExtensionCACert.crt", + "pki/testdata/nist-pkits/certs/UnknownCRLExtensionCACert.crt", + "pki/testdata/nist-pkits/certs/UserNoticeQualifierTest15EE.crt", + "pki/testdata/nist-pkits/certs/UserNoticeQualifierTest16EE.crt", + "pki/testdata/nist-pkits/certs/UserNoticeQualifierTest17EE.crt", + "pki/testdata/nist-pkits/certs/UserNoticeQualifierTest18EE.crt", + "pki/testdata/nist-pkits/certs/UserNoticeQualifierTest19EE.crt", + "pki/testdata/nist-pkits/certs/ValidBasicSelfIssuedCRLSigningKeyTest6EE.crt", + "pki/testdata/nist-pkits/certs/ValidBasicSelfIssuedNewWithOldTest3EE.crt", + "pki/testdata/nist-pkits/certs/ValidBasicSelfIssuedNewWithOldTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidBasicSelfIssuedOldWithNewTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValidCertificatePathTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNSnameConstraintsTest30EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNSnameConstraintsTest32EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNandRFC822nameConstraintsTest27EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest11EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest14EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest18EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest19EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest5EE.crt", + "pki/testdata/nist-pkits/certs/ValidDNnameConstraintsTest6EE.crt", + "pki/testdata/nist-pkits/certs/ValidDSAParameterInheritanceTest5EE.crt", + "pki/testdata/nist-pkits/certs/ValidDSASignaturesTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidGeneralizedTimeCRLnextUpdateTest13EE.crt", + "pki/testdata/nist-pkits/certs/ValidGeneralizedTimenotAfterDateTest8EE.crt", + "pki/testdata/nist-pkits/certs/ValidGeneralizedTimenotBeforeDateTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidIDPwithindirectCRLTest22EE.crt", + "pki/testdata/nist-pkits/certs/ValidIDPwithindirectCRLTest24EE.crt", + "pki/testdata/nist-pkits/certs/ValidIDPwithindirectCRLTest25EE.crt", + "pki/testdata/nist-pkits/certs/ValidLongSerialNumberTest16EE.crt", + "pki/testdata/nist-pkits/certs/ValidLongSerialNumberTest17EE.crt", + "pki/testdata/nist-pkits/certs/ValidNameChainingCapitalizationTest5EE.crt", + "pki/testdata/nist-pkits/certs/ValidNameChainingWhitespaceTest3EE.crt", + "pki/testdata/nist-pkits/certs/ValidNameChainingWhitespaceTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidNameUIDsTest6EE.crt", + "pki/testdata/nist-pkits/certs/ValidNegativeSerialNumberTest14EE.crt", + "pki/testdata/nist-pkits/certs/ValidNoissuingDistributionPointTest10EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest11EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest12EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest13EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest14EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest3EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest5EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest6EE.crt", + "pki/testdata/nist-pkits/certs/ValidPolicyMappingTest9EE.crt", + "pki/testdata/nist-pkits/certs/ValidRFC3280MandatoryAttributeTypesTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValidRFC3280OptionalAttributeTypesTest8EE.crt", + "pki/testdata/nist-pkits/certs/ValidRFC822nameConstraintsTest21EE.crt", + "pki/testdata/nist-pkits/certs/ValidRFC822nameConstraintsTest23EE.crt", + "pki/testdata/nist-pkits/certs/ValidRFC822nameConstraintsTest25EE.crt", + "pki/testdata/nist-pkits/certs/ValidRolloverfromPrintableStringtoUTF8StringTest10EE.crt", + "pki/testdata/nist-pkits/certs/ValidSelfIssuedinhibitAnyPolicyTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValidSelfIssuedinhibitAnyPolicyTest9EE.crt", + "pki/testdata/nist-pkits/certs/ValidSelfIssuedinhibitPolicyMappingTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValidSelfIssuedpathLenConstraintTest15EE.crt", + "pki/testdata/nist-pkits/certs/ValidSelfIssuedpathLenConstraintTest17EE.crt", + "pki/testdata/nist-pkits/certs/ValidSelfIssuedrequireExplicitPolicyTest6EE.crt", + "pki/testdata/nist-pkits/certs/ValidSeparateCertificateandCRLKeysTest19EE.crt", + "pki/testdata/nist-pkits/certs/ValidTwoCRLsTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValidURInameConstraintsTest34EE.crt", + "pki/testdata/nist-pkits/certs/ValidURInameConstraintsTest36EE.crt", + "pki/testdata/nist-pkits/certs/ValidUTF8StringCaseInsensitiveMatchTest11EE.crt", + "pki/testdata/nist-pkits/certs/ValidUTF8StringEncodedNamesTest9EE.crt", + "pki/testdata/nist-pkits/certs/ValidUnknownNotCriticalCertificateExtensionTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValidbasicConstraintsNotCriticalTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidcRLIssuerTest28EE.crt", + "pki/testdata/nist-pkits/certs/ValidcRLIssuerTest29EE.crt", + "pki/testdata/nist-pkits/certs/ValidcRLIssuerTest30EE.crt", + "pki/testdata/nist-pkits/certs/ValidcRLIssuerTest33EE.crt", + "pki/testdata/nist-pkits/certs/ValiddeltaCRLTest2EE.crt", + "pki/testdata/nist-pkits/certs/ValiddeltaCRLTest5EE.crt", + "pki/testdata/nist-pkits/certs/ValiddeltaCRLTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValiddeltaCRLTest8EE.crt", + "pki/testdata/nist-pkits/certs/ValiddistributionPointTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValiddistributionPointTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValiddistributionPointTest5EE.crt", + "pki/testdata/nist-pkits/certs/ValiddistributionPointTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValidinhibitAnyPolicyTest2EE.crt", + "pki/testdata/nist-pkits/certs/ValidinhibitPolicyMappingTest2EE.crt", + "pki/testdata/nist-pkits/certs/ValidinhibitPolicyMappingTest4EE.crt", + "pki/testdata/nist-pkits/certs/ValidkeyUsageNotCriticalTest3EE.crt", + "pki/testdata/nist-pkits/certs/ValidonlyContainsCACertsTest13EE.crt", + "pki/testdata/nist-pkits/certs/ValidonlySomeReasonsTest18EE.crt", + "pki/testdata/nist-pkits/certs/ValidonlySomeReasonsTest19EE.crt", + "pki/testdata/nist-pkits/certs/ValidpathLenConstraintTest13EE.crt", + "pki/testdata/nist-pkits/certs/ValidpathLenConstraintTest14EE.crt", + "pki/testdata/nist-pkits/certs/ValidpathLenConstraintTest7EE.crt", + "pki/testdata/nist-pkits/certs/ValidpathLenConstraintTest8EE.crt", + "pki/testdata/nist-pkits/certs/Validpre2000UTCnotBeforeDateTest3EE.crt", + "pki/testdata/nist-pkits/certs/ValidrequireExplicitPolicyTest1EE.crt", + "pki/testdata/nist-pkits/certs/ValidrequireExplicitPolicyTest2EE.crt", + "pki/testdata/nist-pkits/certs/ValidrequireExplicitPolicyTest4EE.crt", + "pki/testdata/nist-pkits/certs/WrongCRLCACert.crt", + "pki/testdata/nist-pkits/certs/anyPolicyCACert.crt", + "pki/testdata/nist-pkits/certs/basicConstraintsCriticalcAFalseCACert.crt", + "pki/testdata/nist-pkits/certs/basicConstraintsNotCriticalCACert.crt", + "pki/testdata/nist-pkits/certs/basicConstraintsNotCriticalcAFalseCACert.crt", + "pki/testdata/nist-pkits/certs/deltaCRLCA1Cert.crt", + "pki/testdata/nist-pkits/certs/deltaCRLCA2Cert.crt", + "pki/testdata/nist-pkits/certs/deltaCRLCA3Cert.crt", + "pki/testdata/nist-pkits/certs/deltaCRLIndicatorNoBaseCACert.crt", + "pki/testdata/nist-pkits/certs/distributionPoint1CACert.crt", + "pki/testdata/nist-pkits/certs/distributionPoint2CACert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA1Cert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA2Cert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA3Cert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA3cRLIssuerCert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA4Cert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA4cRLIssuerCert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA5Cert.crt", + "pki/testdata/nist-pkits/certs/indirectCRLCA6Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy0CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1SelfIssuedCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1SelfIssuedsubCA2Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1subCA1Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1subCA2Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1subCAIAP5Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy1subsubCA2Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy5CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy5subCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicy5subsubCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitAnyPolicyTest3EE.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping0CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping0subCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P12CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P12subCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P12subCAIPM5Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P12subsubCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P12subsubCAIPM5Cert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P1CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P1SelfIssuedCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P1SelfIssuedsubCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P1subCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping1P1subsubCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping5CACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping5subCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping5subsubCACert.crt", + "pki/testdata/nist-pkits/certs/inhibitPolicyMapping5subsubsubCACert.crt", + "pki/testdata/nist-pkits/certs/keyUsageCriticalcRLSignFalseCACert.crt", + "pki/testdata/nist-pkits/certs/keyUsageCriticalkeyCertSignFalseCACert.crt", + "pki/testdata/nist-pkits/certs/keyUsageNotCriticalCACert.crt", + "pki/testdata/nist-pkits/certs/keyUsageNotCriticalcRLSignFalseCACert.crt", + "pki/testdata/nist-pkits/certs/keyUsageNotCriticalkeyCertSignFalseCACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN1CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN1SelfIssuedCACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN1subCA1Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN1subCA2Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN1subCA3Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN2CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN3CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN3subCA1Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN3subCA2Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN4CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDN5CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDNS1CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsDNS2CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsRFC822CA1Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsRFC822CA2Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsRFC822CA3Cert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsURI1CACert.crt", + "pki/testdata/nist-pkits/certs/nameConstraintsURI2CACert.crt", + "pki/testdata/nist-pkits/certs/onlyContainsAttributeCertsCACert.crt", + "pki/testdata/nist-pkits/certs/onlyContainsCACertsCACert.crt", + "pki/testdata/nist-pkits/certs/onlyContainsUserCertsCACert.crt", + "pki/testdata/nist-pkits/certs/onlySomeReasonsCA1Cert.crt", + "pki/testdata/nist-pkits/certs/onlySomeReasonsCA2Cert.crt", + "pki/testdata/nist-pkits/certs/onlySomeReasonsCA3Cert.crt", + "pki/testdata/nist-pkits/certs/onlySomeReasonsCA4Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint0CACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint0SelfIssuedCACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint0subCA2Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint0subCACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint1CACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint1SelfIssuedCACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint1SelfIssuedsubCACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint1subCACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6CACert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subCA0Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subCA1Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subCA4Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subsubCA00Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subsubCA11Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subsubCA41Cert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subsubsubCA11XCert.crt", + "pki/testdata/nist-pkits/certs/pathLenConstraint6subsubsubCA41XCert.crt", + "pki/testdata/nist-pkits/certs/pre2000CRLnextUpdateCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy0CACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy0subCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy0subsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy0subsubsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy10CACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy10subCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy10subsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy10subsubsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy2CACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy2SelfIssuedCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy2SelfIssuedsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy2subCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy4CACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy4subCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy4subsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy4subsubsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy5CACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy5subCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy5subsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy5subsubsubCACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy7CACert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy7subCARE2Cert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy7subsubCARE2RE4Cert.crt", + "pki/testdata/nist-pkits/certs/requireExplicitPolicy7subsubsubCARE2RE4Cert.crt", + "pki/testdata/nist-pkits/crls/BadCRLIssuerNameCACRL.crl", + "pki/testdata/nist-pkits/crls/BadCRLSignatureCACRL.crl", + "pki/testdata/nist-pkits/crls/BadSignedCACRL.crl", + "pki/testdata/nist-pkits/crls/BadnotAfterDateCACRL.crl", + "pki/testdata/nist-pkits/crls/BadnotBeforeDateCACRL.crl", + "pki/testdata/nist-pkits/crls/BasicSelfIssuedCRLSigningKeyCACRL.crl", + "pki/testdata/nist-pkits/crls/BasicSelfIssuedCRLSigningKeyCRLCertCRL.crl", + "pki/testdata/nist-pkits/crls/BasicSelfIssuedNewKeyCACRL.crl", + "pki/testdata/nist-pkits/crls/BasicSelfIssuedOldKeyCACRL.crl", + "pki/testdata/nist-pkits/crls/BasicSelfIssuedOldKeySelfIssuedCertCRL.crl", + "pki/testdata/nist-pkits/crls/DSACACRL.crl", + "pki/testdata/nist-pkits/crls/DSAParametersInheritedCACRL.crl", + "pki/testdata/nist-pkits/crls/GeneralizedTimeCRLnextUpdateCACRL.crl", + "pki/testdata/nist-pkits/crls/GoodCACRL.crl", + "pki/testdata/nist-pkits/crls/GoodsubCACRL.crl", + "pki/testdata/nist-pkits/crls/GoodsubCAPanyPolicyMapping1to2CACRL.crl", + "pki/testdata/nist-pkits/crls/LongSerialNumberCACRL.crl", + "pki/testdata/nist-pkits/crls/Mapping1to2CACRL.crl", + "pki/testdata/nist-pkits/crls/MappingFromanyPolicyCACRL.crl", + "pki/testdata/nist-pkits/crls/MappingToanyPolicyCACRL.crl", + "pki/testdata/nist-pkits/crls/MissingbasicConstraintsCACRL.crl", + "pki/testdata/nist-pkits/crls/NameOrderCACRL.crl", + "pki/testdata/nist-pkits/crls/NegativeSerialNumberCACRL.crl", + "pki/testdata/nist-pkits/crls/NoPoliciesCACRL.crl", + "pki/testdata/nist-pkits/crls/NoissuingDistributionPointCACRL.crl", + "pki/testdata/nist-pkits/crls/OldCRLnextUpdateCACRL.crl", + "pki/testdata/nist-pkits/crls/P12Mapping1to3CACRL.crl", + "pki/testdata/nist-pkits/crls/P12Mapping1to3subCACRL.crl", + "pki/testdata/nist-pkits/crls/P12Mapping1to3subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/P1Mapping1to234CACRL.crl", + "pki/testdata/nist-pkits/crls/P1Mapping1to234subCACRL.crl", + "pki/testdata/nist-pkits/crls/P1anyPolicyMapping1to2CACRL.crl", + "pki/testdata/nist-pkits/crls/PanyPolicyMapping1to2CACRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP1234CACRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP1234subCAP123CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP1234subsubCAP123P12CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP123CACRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP123subCAP12CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP123subsubCAP12P1CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP123subsubCAP2P2CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP123subsubsubCAP12P2P1CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP12CACRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP12subCAP1CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP12subsubCAP1P2CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP2subCA2CRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP2subCACRL.crl", + "pki/testdata/nist-pkits/crls/PoliciesP3CACRL.crl", + "pki/testdata/nist-pkits/crls/RFC3280MandatoryAttributeTypesCACRL.crl", + "pki/testdata/nist-pkits/crls/RFC3280OptionalAttributeTypesCACRL.crl", + "pki/testdata/nist-pkits/crls/RevokedsubCACRL.crl", + "pki/testdata/nist-pkits/crls/RolloverfromPrintableStringtoUTF8StringCACRL.crl", + "pki/testdata/nist-pkits/crls/SeparateCertificateandCRLKeysCA2CRL.crl", + "pki/testdata/nist-pkits/crls/SeparateCertificateandCRLKeysCRL.crl", + "pki/testdata/nist-pkits/crls/TrustAnchorRootCRL.crl", + "pki/testdata/nist-pkits/crls/TwoCRLsCABadCRL.crl", + "pki/testdata/nist-pkits/crls/TwoCRLsCAGoodCRL.crl", + "pki/testdata/nist-pkits/crls/UIDCACRL.crl", + "pki/testdata/nist-pkits/crls/UTF8StringCaseInsensitiveMatchCACRL.crl", + "pki/testdata/nist-pkits/crls/UTF8StringEncodedNamesCACRL.crl", + "pki/testdata/nist-pkits/crls/UnknownCRLEntryExtensionCACRL.crl", + "pki/testdata/nist-pkits/crls/UnknownCRLExtensionCACRL.crl", + "pki/testdata/nist-pkits/crls/WrongCRLCACRL.crl", + "pki/testdata/nist-pkits/crls/anyPolicyCACRL.crl", + "pki/testdata/nist-pkits/crls/basicConstraintsCriticalcAFalseCACRL.crl", + "pki/testdata/nist-pkits/crls/basicConstraintsNotCriticalCACRL.crl", + "pki/testdata/nist-pkits/crls/basicConstraintsNotCriticalcAFalseCACRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLCA1CRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLCA1deltaCRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLCA2CRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLCA2deltaCRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLCA3CRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLCA3deltaCRL.crl", + "pki/testdata/nist-pkits/crls/deltaCRLIndicatorNoBaseCACRL.crl", + "pki/testdata/nist-pkits/crls/distributionPoint1CACRL.crl", + "pki/testdata/nist-pkits/crls/distributionPoint2CACRL.crl", + "pki/testdata/nist-pkits/crls/indirectCRLCA1CRL.crl", + "pki/testdata/nist-pkits/crls/indirectCRLCA3CRL.crl", + "pki/testdata/nist-pkits/crls/indirectCRLCA3cRLIssuerCRL.crl", + "pki/testdata/nist-pkits/crls/indirectCRLCA4cRLIssuerCRL.crl", + "pki/testdata/nist-pkits/crls/indirectCRLCA5CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy0CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy1CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy1subCA1CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy1subCA2CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy1subCAIAP5CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy1subsubCA2CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy5CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy5subCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitAnyPolicy5subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping0CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping0subCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P12CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P12subCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P12subCAIPM5CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P12subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P12subsubCAIPM5CRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P1CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P1subCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping1P1subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping5CACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping5subCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping5subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/inhibitPolicyMapping5subsubsubCACRL.crl", + "pki/testdata/nist-pkits/crls/keyUsageCriticalcRLSignFalseCACRL.crl", + "pki/testdata/nist-pkits/crls/keyUsageCriticalkeyCertSignFalseCACRL.crl", + "pki/testdata/nist-pkits/crls/keyUsageNotCriticalCACRL.crl", + "pki/testdata/nist-pkits/crls/keyUsageNotCriticalcRLSignFalseCACRL.crl", + "pki/testdata/nist-pkits/crls/keyUsageNotCriticalkeyCertSignFalseCACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN1CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN1subCA1CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN1subCA2CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN1subCA3CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN2CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN3CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN3subCA1CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN3subCA2CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN4CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDN5CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDNS1CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsDNS2CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsRFC822CA1CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsRFC822CA2CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsRFC822CA3CRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsURI1CACRL.crl", + "pki/testdata/nist-pkits/crls/nameConstraintsURI2CACRL.crl", + "pki/testdata/nist-pkits/crls/onlyContainsAttributeCertsCACRL.crl", + "pki/testdata/nist-pkits/crls/onlyContainsCACertsCACRL.crl", + "pki/testdata/nist-pkits/crls/onlyContainsUserCertsCACRL.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA1compromiseCRL.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA1otherreasonsCRL.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA2CRL1.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA2CRL2.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA3compromiseCRL.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA3otherreasonsCRL.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA4compromiseCRL.crl", + "pki/testdata/nist-pkits/crls/onlySomeReasonsCA4otherreasonsCRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint0CACRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint0subCA2CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint0subCACRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint1CACRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint1subCACRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6CACRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subCA0CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subCA1CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subCA4CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subsubCA00CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subsubCA11CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subsubCA41CRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subsubsubCA11XCRL.crl", + "pki/testdata/nist-pkits/crls/pathLenConstraint6subsubsubCA41XCRL.crl", + "pki/testdata/nist-pkits/crls/pre2000CRLnextUpdateCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy0CACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy0subCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy0subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy0subsubsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy10CACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy10subCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy10subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy10subsubsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy2CACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy2subCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy4CACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy4subCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy4subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy4subsubsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy5CACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy5subCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy5subsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy5subsubsubCACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy7CACRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy7subCARE2CRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy7subsubCARE2RE4CRL.crl", + "pki/testdata/nist-pkits/crls/requireExplicitPolicy7subsubsubCARE2RE4CRL.crl", + "pki/testdata/ocsp_unittest/bad_ocsp_type.pem", + "pki/testdata/ocsp_unittest/bad_signature.pem", + "pki/testdata/ocsp_unittest/bad_status.pem", + "pki/testdata/ocsp_unittest/good_response.pem", + "pki/testdata/ocsp_unittest/good_response_next_update.pem", + "pki/testdata/ocsp_unittest/good_response_sha256.pem", + "pki/testdata/ocsp_unittest/has_critical_ct_extension.pem", + "pki/testdata/ocsp_unittest/has_critical_response_extension.pem", + "pki/testdata/ocsp_unittest/has_critical_single_extension.pem", + "pki/testdata/ocsp_unittest/has_extension.pem", + "pki/testdata/ocsp_unittest/has_single_extension.pem", + "pki/testdata/ocsp_unittest/has_version.pem", + "pki/testdata/ocsp_unittest/malformed_request.pem", + "pki/testdata/ocsp_unittest/missing_response.pem", + "pki/testdata/ocsp_unittest/multiple_response.pem", + "pki/testdata/ocsp_unittest/no_response.pem", + "pki/testdata/ocsp_unittest/ocsp_extra_certs.pem", + "pki/testdata/ocsp_unittest/ocsp_sign_bad_indirect.pem", + "pki/testdata/ocsp_unittest/ocsp_sign_direct.pem", + "pki/testdata/ocsp_unittest/ocsp_sign_indirect.pem", + "pki/testdata/ocsp_unittest/ocsp_sign_indirect_missing.pem", + "pki/testdata/ocsp_unittest/other_response.pem", + "pki/testdata/ocsp_unittest/responder_id.pem", + "pki/testdata/ocsp_unittest/responder_name.pem", + "pki/testdata/ocsp_unittest/revoke_response.pem", + "pki/testdata/ocsp_unittest/revoke_response_reason.pem", + "pki/testdata/ocsp_unittest/unknown_response.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/empty_sequence.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/extra_contents_after_extension_sequence.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/extra_contents_after_issuer_and_serial.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_contents.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_issuer.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_key_identifier.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/invalid_serial.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/issuer_and_serial.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/issuer_only.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/key_identifier.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/key_identifier_and_issuer_and_serial.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/serial_only.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier/url_issuer_and_serial.pem", + "pki/testdata/parse_certificate_unittest/authority_key_identifier_not_sequence.pem", + "pki/testdata/parse_certificate_unittest/bad_key_usage.pem", + "pki/testdata/parse_certificate_unittest/bad_policy_qualifiers.pem", + "pki/testdata/parse_certificate_unittest/bad_signature_algorithm_oid.pem", + "pki/testdata/parse_certificate_unittest/bad_validity.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_ca_false.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_ca_no_path.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_ca_path_9.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_negative_path.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_not_ca.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_path_too_large.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_pathlen_255.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_pathlen_256.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_pathlen_not_ca.pem", + "pki/testdata/parse_certificate_unittest/basic_constraints_unconsumed_data.pem", + "pki/testdata/parse_certificate_unittest/cert_algorithm_not_sequence.pem", + "pki/testdata/parse_certificate_unittest/cert_data_after_signature.pem", + "pki/testdata/parse_certificate_unittest/cert_empty_sequence.pem", + "pki/testdata/parse_certificate_unittest/cert_missing_signature.pem", + "pki/testdata/parse_certificate_unittest/cert_not_sequence.pem", + "pki/testdata/parse_certificate_unittest/cert_signature_not_bit_string.pem", + "pki/testdata/parse_certificate_unittest/cert_skeleton.pem", + "pki/testdata/parse_certificate_unittest/cert_version3.pem", + "pki/testdata/parse_certificate_unittest/crldp_1uri_noissuer.pem", + "pki/testdata/parse_certificate_unittest/crldp_3uri_noissuer.pem", + "pki/testdata/parse_certificate_unittest/crldp_full_name_as_dirname.pem", + "pki/testdata/parse_certificate_unittest/crldp_issuer_as_dirname.pem", + "pki/testdata/parse_certificate_unittest/extended_key_usage.pem", + "pki/testdata/parse_certificate_unittest/extension_critical.pem", + "pki/testdata/parse_certificate_unittest/extension_critical_0.pem", + "pki/testdata/parse_certificate_unittest/extension_critical_3.pem", + "pki/testdata/parse_certificate_unittest/extension_not_critical.pem", + "pki/testdata/parse_certificate_unittest/extensions_data_after_sequence.pem", + "pki/testdata/parse_certificate_unittest/extensions_duplicate_key_usage.pem", + "pki/testdata/parse_certificate_unittest/extensions_empty_sequence.pem", + "pki/testdata/parse_certificate_unittest/extensions_not_sequence.pem", + "pki/testdata/parse_certificate_unittest/extensions_real.pem", + "pki/testdata/parse_certificate_unittest/failed_signature_algorithm.pem", + "pki/testdata/parse_certificate_unittest/inhibit_any_policy.pem", + "pki/testdata/parse_certificate_unittest/issuer_bad_printable_string.pem", + "pki/testdata/parse_certificate_unittest/key_usage.pem", + "pki/testdata/parse_certificate_unittest/name_constraints_bad_ip.pem", + "pki/testdata/parse_certificate_unittest/policies.pem", + "pki/testdata/parse_certificate_unittest/policy_constraints_empty.pem", + "pki/testdata/parse_certificate_unittest/policy_constraints_inhibit.pem", + "pki/testdata/parse_certificate_unittest/policy_constraints_inhibit_require.pem", + "pki/testdata/parse_certificate_unittest/policy_constraints_require.pem", + "pki/testdata/parse_certificate_unittest/policy_qualifiers_empty_sequence.pem", + "pki/testdata/parse_certificate_unittest/serial_37_bytes.pem", + "pki/testdata/parse_certificate_unittest/serial_negative.pem", + "pki/testdata/parse_certificate_unittest/serial_not_minimal.pem", + "pki/testdata/parse_certificate_unittest/serial_not_number.pem", + "pki/testdata/parse_certificate_unittest/serial_zero.pem", + "pki/testdata/parse_certificate_unittest/serial_zero_padded.pem", + "pki/testdata/parse_certificate_unittest/serial_zero_padded_21_bytes.pem", + "pki/testdata/parse_certificate_unittest/signature_algorithm_null.pem", + "pki/testdata/parse_certificate_unittest/subject_alt_name.pem", + "pki/testdata/parse_certificate_unittest/subject_blank_subjectaltname_not_critical.pem", + "pki/testdata/parse_certificate_unittest/subject_key_identifier_not_octet_string.pem", + "pki/testdata/parse_certificate_unittest/subject_not_ascii.pem", + "pki/testdata/parse_certificate_unittest/subject_not_printable_string.pem", + "pki/testdata/parse_certificate_unittest/subject_printable_string_containing_utf8_client_cert.pem", + "pki/testdata/parse_certificate_unittest/subject_t61string.pem", + "pki/testdata/parse_certificate_unittest/subject_t61string_1-32.pem", + "pki/testdata/parse_certificate_unittest/subject_t61string_126-160.pem", + "pki/testdata/parse_certificate_unittest/subject_t61string_actual.pem", + "pki/testdata/parse_certificate_unittest/subjectaltname_bad_ip.pem", + "pki/testdata/parse_certificate_unittest/subjectaltname_dns_not_ascii.pem", + "pki/testdata/parse_certificate_unittest/subjectaltname_general_names_empty_sequence.pem", + "pki/testdata/parse_certificate_unittest/subjectaltname_trailing_data.pem", + "pki/testdata/parse_certificate_unittest/tbs_explicit_v1.pem", + "pki/testdata/parse_certificate_unittest/tbs_v1.pem", + "pki/testdata/parse_certificate_unittest/tbs_v1_extensions.pem", + "pki/testdata/parse_certificate_unittest/tbs_v2_extensions.pem", + "pki/testdata/parse_certificate_unittest/tbs_v2_issuer_and_subject_unique_id.pem", + "pki/testdata/parse_certificate_unittest/tbs_v2_issuer_unique_id.pem", + "pki/testdata/parse_certificate_unittest/tbs_v2_no_optionals.pem", + "pki/testdata/parse_certificate_unittest/tbs_v3_all_optionals.pem", + "pki/testdata/parse_certificate_unittest/tbs_v3_data_after_extensions.pem", + "pki/testdata/parse_certificate_unittest/tbs_v3_extensions.pem", + "pki/testdata/parse_certificate_unittest/tbs_v3_extensions_not_sequence.pem", + "pki/testdata/parse_certificate_unittest/tbs_v3_no_optionals.pem", + "pki/testdata/parse_certificate_unittest/tbs_v3_real.pem", + "pki/testdata/parse_certificate_unittest/tbs_v4.pem", + "pki/testdata/parse_certificate_unittest/tbs_validity_both_generalized_time.pem", + "pki/testdata/parse_certificate_unittest/tbs_validity_both_utc_time.pem", + "pki/testdata/parse_certificate_unittest/tbs_validity_generalized_time_and_utc_time.pem", + "pki/testdata/parse_certificate_unittest/tbs_validity_relaxed.pem", + "pki/testdata/parse_certificate_unittest/tbs_validity_utc_time_and_generalized_time.pem", + "pki/testdata/parse_certificate_unittest/v1_explicit_version.pem", + "pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/int_match_name_only.pem", + "pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/int_matching.pem", + "pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/int_mismatch.pem", + "pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/root.pem", + "pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/root2.pem", + "pki/testdata/path_builder_unittest/key_id_name_and_serial_prioritization/target.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_different_ski_a.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_different_ski_b.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_different_ski_c.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_matching_ski_a.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_matching_ski_b.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_matching_ski_c.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_no_ski_a.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_no_ski_b.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/int_no_ski_c.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/root.pem", + "pki/testdata/path_builder_unittest/key_id_prioritization/target.pem", + "pki/testdata/path_builder_unittest/multi-root-A-by-B.pem", + "pki/testdata/path_builder_unittest/multi-root-B-by-C.pem", + "pki/testdata/path_builder_unittest/multi-root-B-by-F.pem", + "pki/testdata/path_builder_unittest/multi-root-C-by-D.pem", + "pki/testdata/path_builder_unittest/multi-root-C-by-E.pem", + "pki/testdata/path_builder_unittest/multi-root-D-by-D.pem", + "pki/testdata/path_builder_unittest/multi-root-E-by-E.pem", + "pki/testdata/path_builder_unittest/multi-root-F-by-E.pem", + "pki/testdata/path_builder_unittest/precertificate/precertificate.pem", + "pki/testdata/path_builder_unittest/precertificate/root.pem", + "pki/testdata/path_builder_unittest/self_issued_prioritization/root1.pem", + "pki/testdata/path_builder_unittest/self_issued_prioritization/root1_cross.pem", + "pki/testdata/path_builder_unittest/self_issued_prioritization/root2.pem", + "pki/testdata/path_builder_unittest/self_issued_prioritization/target.pem", + "pki/testdata/path_builder_unittest/validity_date_prioritization/int_ac.pem", + "pki/testdata/path_builder_unittest/validity_date_prioritization/int_ad.pem", + "pki/testdata/path_builder_unittest/validity_date_prioritization/int_bc.pem", + "pki/testdata/path_builder_unittest/validity_date_prioritization/int_bd.pem", + "pki/testdata/path_builder_unittest/validity_date_prioritization/root.pem", + "pki/testdata/path_builder_unittest/validity_date_prioritization/target.pem", + "pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/basic-constraints-pathlen-0-self-issued/main.test", + "pki/testdata/verify_certificate_chain_unittest/expired-intermediate/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/expired-intermediate/not-after.test", + "pki/testdata/verify_certificate_chain_unittest/expired-intermediate/not-before.test", + "pki/testdata/verify_certificate_chain_unittest/expired-root/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/expired-root/not-after-ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/expired-root/not-after-ta-with-expiration-and-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/expired-root/not-after-ta-with-expiration.test", + "pki/testdata/verify_certificate_chain_unittest/expired-root/not-after.test", + "pki/testdata/verify_certificate_chain_unittest/expired-root/not-before-ta-with-expiration.test", + "pki/testdata/verify_certificate_chain_unittest/expired-root/not-before.test", + "pki/testdata/verify_certificate_chain_unittest/expired-target/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/expired-target/not-after.test", + "pki/testdata/verify_certificate_chain_unittest/expired-target/not-before.test", + "pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/incorrect-trust-anchor/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-and-target-wrong-signature/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-ca-false/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-basic-constraints-not-critical/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/any.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/clientauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/clientauth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-any-and-clientauth/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/any.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/clientauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/clientauth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-clientauth/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-any.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-clientAuth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-clientAuth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-serverAuth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha1-eku-serverAuth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-any.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-clientAuth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-clientAuth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-serverAuth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-eku-server-gated-crypto/sha256-eku-serverAuth.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-basic-constraints/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-lacks-signing-key-usage/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-signed-with-sha1/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-critical-extension/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-unknown-non-critical-extension/main.test", + "pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/intermediate-wrong-signature-no-authority-key-identifier/main.test", + "pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/anchor.pem", + "pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/anchor.test", + "pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/target.pem", + "pki/testdata/verify_certificate_chain_unittest/issuer-and-subject-not-byte-for-byte-equal/target.test", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/longrolloverchain.pem", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/longrolloverchain.test", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/newchain.pem", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/newchain.test", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/oldchain.pem", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/oldchain.test", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/rolloverchain.pem", + "pki/testdata/verify_certificate_chain_unittest/key-rollover/rolloverchain.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/ok-all-types.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/ok-all-types.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-all-types.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-all-types.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dirnames-excluded.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dirnames-excluded.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dirnames-permitted.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dirnames-permitted.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dns-excluded.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dns-excluded.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dns-permitted.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-dns-permitted.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-ips-excluded.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-ips-excluded.test", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-ips-permitted.pem", + "pki/testdata/verify_certificate_chain_unittest/many-names/toomany-ips-permitted.test", + "pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/main.test", + "pki/testdata/verify_certificate_chain_unittest/non-self-signed-root/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.1.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.1.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.1.4.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.1.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.1.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.1.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.1.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.10.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.13.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.4.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.7.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.10.8.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.10.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.11.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.8.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.11.9.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.10.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.4.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.12.8.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.10.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.12.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.13.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.15.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.16.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.17.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.20.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.21.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.22.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.23.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.24.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.25.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.26.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.27.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.28.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.29.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.31.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.33.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.34.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.35.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.36.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.37.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.38.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.7.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.8.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.13.9.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.16.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.2.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.2.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.2.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.2.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.2.7.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.3.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.3.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.10.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.11.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.12.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.16.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.6.9.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.7.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.7.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.1.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.12.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.14.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.2.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.4.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.6.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.7.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.8.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.8.9.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.9.3.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.9.5.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.9.7.txt", + "pki/testdata/verify_certificate_chain_unittest/pkits_errors/4.9.8.txt", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-fail/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-anypolicy-by-root-ok/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-fail/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-inhibit-mapping-by-root-ok/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-ok/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-ok/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-ok/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-on-root-ok/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-on-root-wrong/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-fail/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/main.test", + "pki/testdata/verify_certificate_chain_unittest/policies-required-by-root-ok/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/main.test", + "pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-fail/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/main.test", + "pki/testdata/verify_certificate_chain_unittest/policy-mappings-on-root-ok/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/main.test", + "pki/testdata/verify_certificate_chain_unittest/root-basic-constraints-ca-false/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-constraints-strict.test", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-expiration-and-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth-ta-with-expiration.test", + "pki/testdata/verify_certificate_chain_unittest/root-eku-clientauth/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/main.test", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/ta-with-constraints-require-basic-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-basic-constraints/ta-with-require-basic-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/main.test", + "pki/testdata/verify_certificate_chain_unittest/root-lacks-keycertsign-key-usage/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/distrusted-root-expired.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/distrusted-root.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/ta-with-constraints.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/ta-with-expiration.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/trusted_leaf-and-trust_anchor.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/trusted_leaf-root.test", + "pki/testdata/verify_certificate_chain_unittest/target-and-intermediate/unspecified-trust-root.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-any/any.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-any/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-eku-any/clientauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-any/clientauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-any/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-any/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/any.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/clientauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/clientauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-clientauth/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-many/any.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-many/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-eku-many/clientauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-many/clientauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-many/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-many/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-none/any.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-none/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-eku-none/clientauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-none/clientauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-none/serverauth-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-eku-none/serverauth.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-has-512bit-rsa-key/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/target_only-trusted_leaf-strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/target_only-trusted_leaf.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-ca-basic-constraints/target_only.pem", + "pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-has-keycertsign-but-not-ca/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-has-pathlen-but-not-ca/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-and-eku/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-msapplicationpolicies-no-eku/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-not-end-entity/strict.test", + "pki/testdata/verify_certificate_chain_unittest/target-only/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-only/trusted_anchor.test", + "pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf-and-trust_anchor.test", + "pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf-not_after.test", + "pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf-wrong_eku.test", + "pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf.test", + "pki/testdata/verify_certificate_chain_unittest/target-only/trusted_leaf_require_self_signed.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfissued/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_anchor.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_leaf-and-trust_anchor.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_leaf.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfissued/trusted_leaf_require_self_signed.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfsigned/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf-and-trust_anchor.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf-not_after.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf-wrong_eku.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf.test", + "pki/testdata/verify_certificate_chain_unittest/target-selfsigned/trusted_leaf_require_self_signed.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-decipherOnly.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-decipherOnly.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-digitalSignature.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-digitalSignature.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyAgreement.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyAgreement.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyEncipherment.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/ec-keyEncipherment.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-decipherOnly.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-decipherOnly.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-digitalSignature.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-digitalSignature.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-keyAgreement.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-keyAgreement.test", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-keyEncipherment.pem", + "pki/testdata/verify_certificate_chain_unittest/target-serverauth-various-keyusages/rsa-keyEncipherment.test", + "pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-signed-by-512bit-rsa/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-signed-using-ecdsa/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-signed-with-sha1/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/target_only-trusted_leaf.test", + "pki/testdata/verify_certificate_chain_unittest/target-unknown-critical-extension/target_only.pem", + "pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-wrong-signature-no-authority-key-identifier/main.test", + "pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/target-wrong-signature/main.test", + "pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/unknown-critical-policy-qualifier/main.test", + "pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/unknown-non-critical-policy-qualifier/main.test", + "pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/violates-basic-constraints-pathlen-0/main.test", + "pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/chain.pem", + "pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/main.test", + "pki/testdata/verify_certificate_chain_unittest/violates-pathlen-1-from-root/ta-with-constraints.test", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-case_swap-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-case_swap-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-case_swap-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-case_swap.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-extra_whitespace-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-extra_whitespace-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-extra_whitespace-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-extra_whitespace.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-unmangled-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-unmangled-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-unmangled-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-BMPSTRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-case_swap-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-case_swap-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-case_swap-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-case_swap.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-extra_whitespace-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-extra_whitespace-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-extra_whitespace-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-extra_whitespace.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-rdn_sorting_1.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-rdn_sorting_2.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-unmangled-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-unmangled-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-unmangled-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-PRINTABLESTRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-case_swap-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-case_swap-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-case_swap-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-case_swap.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-extra_whitespace-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-extra_whitespace-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-extra_whitespace-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-extra_whitespace.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-unmangled-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-unmangled-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-unmangled-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-T61STRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-case_swap-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-case_swap-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-case_swap-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-case_swap.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-extra_whitespace-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-extra_whitespace-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-extra_whitespace-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-extra_whitespace.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-unmangled-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-unmangled-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-unmangled-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UNIVERSALSTRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-case_swap-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-case_swap-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-case_swap-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-case_swap.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-extra_whitespace-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-extra_whitespace-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-extra_whitespace-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-extra_whitespace.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-unmangled-dupe_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-unmangled-extra_attr.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-unmangled-extra_rdn.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-UTF8-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-mixed-rdn_dupetype_sorting_1.pem", + "pki/testdata/verify_name_match_unittest/names/ascii-mixed-rdn_dupetype_sorting_2.pem", + "pki/testdata/verify_name_match_unittest/names/custom-custom-normalized.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-AttributeTypeAndValue-badAttributeType.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-AttributeTypeAndValue-empty.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-AttributeTypeAndValue-extradata.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-AttributeTypeAndValue-onlyOneElement.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-AttributeTypeAndValue-setNotSequence.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-Name-setInsteadOfSequence.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-RDN-empty.pem", + "pki/testdata/verify_name_match_unittest/names/invalid-RDN-sequenceInsteadOfSet.pem", + "pki/testdata/verify_name_match_unittest/names/unicode-mixed-normalized.pem", + "pki/testdata/verify_name_match_unittest/names/unicode-mixed-unnormalized.pem", + "pki/testdata/verify_name_match_unittest/names/unicode_bmp-BMPSTRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/unicode_bmp-UNIVERSALSTRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/unicode_bmp-UTF8-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/unicode_supplementary-UNIVERSALSTRING-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/unicode_supplementary-UTF8-unmangled.pem", + "pki/testdata/verify_name_match_unittest/names/valid-Name-empty.pem", + "pki/testdata/verify_name_match_unittest/names/valid-minimal.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-spki-params-null.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-unused-bits-signature.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-using-ecdh-key.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-using-ecmqv-key.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-using-rsa-algorithm.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512-wrong-signature-format.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-prime256v1-sha512.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-secp384r1-sha256-corrupted-data.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-secp384r1-sha256.pem", + "pki/testdata/verify_signed_data_unittest/ecdsa-using-rsa-key.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha1-bad-key-der-length.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha1-bad-key-der-null.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha1-key-params-absent.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha1-using-pss-key-no-params.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha1-wrong-algorithm.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha1.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha256-key-encoded-ber.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha256-spki-non-null-params.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha256-using-ecdsa-algorithm.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha256-using-id-ea-rsa.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pkcs1-sha256.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pss-sha256-using-pss-key-with-params.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pss-sha256-wrong-salt.pem", + "pki/testdata/verify_signed_data_unittest/rsa-pss-sha256.pem", + "pki/testdata/verify_signed_data_unittest/rsa-using-ec-key.pem", + "pki/testdata/verify_signed_data_unittest/rsa2048-pkcs1-sha512.pem", + "pki/testdata/verify_unittest/google-leaf.der", + "pki/testdata/verify_unittest/self-issued.pem" + ] + }, + "ssl": { + "srcs": [ + "ssl/bio_ssl.cc", + "ssl/d1_both.cc", + "ssl/d1_lib.cc", + "ssl/d1_pkt.cc", + "ssl/d1_srtp.cc", + "ssl/dtls_method.cc", + "ssl/dtls_record.cc", + "ssl/encrypted_client_hello.cc", + "ssl/extensions.cc", + "ssl/handoff.cc", + "ssl/handshake.cc", + "ssl/handshake_client.cc", + "ssl/handshake_server.cc", + "ssl/s3_both.cc", + "ssl/s3_lib.cc", + "ssl/s3_pkt.cc", + "ssl/ssl_aead_ctx.cc", + "ssl/ssl_asn1.cc", + "ssl/ssl_buffer.cc", + "ssl/ssl_cert.cc", + "ssl/ssl_cipher.cc", + "ssl/ssl_credential.cc", + "ssl/ssl_file.cc", + "ssl/ssl_key_share.cc", + "ssl/ssl_lib.cc", + "ssl/ssl_privkey.cc", + "ssl/ssl_session.cc", + "ssl/ssl_stat.cc", + "ssl/ssl_transcript.cc", + "ssl/ssl_versions.cc", + "ssl/ssl_x509.cc", + "ssl/t1_enc.cc", + "ssl/tls13_both.cc", + "ssl/tls13_client.cc", + "ssl/tls13_enc.cc", + "ssl/tls13_server.cc", + "ssl/tls_method.cc", + "ssl/tls_record.cc" + ], + "hdrs": [ + "include/openssl/dtls1.h", + "include/openssl/srtp.h", + "include/openssl/ssl.h", + "include/openssl/ssl3.h", + "include/openssl/tls1.h" + ], + "internal_hdrs": [ + "ssl/internal.h" + ] + }, + "ssl_test": { + "srcs": [ + "crypto/test/gtest_main.cc", + "ssl/span_test.cc", + "ssl/ssl_c_test.c", + "ssl/ssl_test.cc" + ] + }, + "test_support": { + "srcs": [ + "crypto/test/abi_test.cc", + "crypto/test/file_test.cc", + "crypto/test/file_test_gtest.cc", + "crypto/test/file_util.cc", + "crypto/test/test_data.cc", + "crypto/test/test_util.cc", + "crypto/test/wycheproof_util.cc" + ], + "internal_hdrs": [ + "crypto/test/abi_test.h", + "crypto/test/file_test.h", + "crypto/test/file_util.h", + "crypto/test/gtest_main.h", + "crypto/test/test_data.h", + "crypto/test/test_util.h", + "crypto/test/wycheproof_util.h", + "ssl/test/async_bio.h", + "ssl/test/fuzzer.h", + "ssl/test/fuzzer_tags.h", + "ssl/test/handshake_util.h", + "ssl/test/mock_quic_transport.h", + "ssl/test/packeted_bio.h", + "ssl/test/settings_writer.h", + "ssl/test/test_config.h", + "ssl/test/test_state.h" + ], + "asm": [ + "gen/test_support/trampoline-armv4-linux.S", + "gen/test_support/trampoline-armv8-apple.S", + "gen/test_support/trampoline-armv8-linux.S", + "gen/test_support/trampoline-armv8-win.S", + "gen/test_support/trampoline-x86-apple.S", + "gen/test_support/trampoline-x86-linux.S", + "gen/test_support/trampoline-x86_64-apple.S", + "gen/test_support/trampoline-x86_64-linux.S" + ], + "nasm": [ + "gen/test_support/trampoline-x86-win.asm", + "gen/test_support/trampoline-x86_64-win.asm" + ] + }, + "urandom_test": { + "srcs": [ + "crypto/fipsmodule/rand/urandom_test.cc" + ] + } +} \ No newline at end of file diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-armv4-linux.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv4-linux.S new file mode 100644 index 0000000000..34a2819d71 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv4-linux.S @@ -0,0 +1,368 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) +.syntax unified + +.arch armv7-a +.fpu vfp + +.text + +@ abi_test_trampoline loads callee-saved registers from |state|, calls |func| +@ with |argv|, then saves the callee-saved registers into |state|. It returns +@ the result of |func|. The |unwind| argument is unused. +@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state, +@ const uint32_t *argv, size_t argc, +@ int unwind); +.type abi_test_trampoline, %function +.globl abi_test_trampoline +.hidden abi_test_trampoline +.align 4 +abi_test_trampoline: + @ Save parameters and all callee-saved registers. For convenience, we + @ save r9 on iOS even though it's volatile. + vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} + + @ Reserve stack space for six (10-4) stack parameters, plus an extra 4 + @ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3). + sub sp, sp, #28 + + @ Every register in AAPCS is either non-volatile or a parameter (except + @ r9 on iOS), so this code, by the actual call, loses all its scratch + @ registers. First fill in stack parameters while there are registers + @ to spare. + cmp r3, #4 + bls .Lstack_args_done + mov r4, sp @ r4 is the output pointer. + add r5, r2, r3, lsl #2 @ Set r5 to the end of argv. + add r2, r2, #16 @ Skip four arguments. +.Lstack_args_loop: + ldr r6, [r2], #4 + cmp r2, r5 + str r6, [r4], #4 + bne .Lstack_args_loop + +.Lstack_args_done: + @ Load registers from |r1|. + vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15} +#if defined(__APPLE__) + @ r9 is not volatile on iOS. + ldmia r1!, {r4,r5,r6,r7,r8,r10-r11} +#else + ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11} +#endif + + @ Load register parameters. This uses up our remaining registers, so we + @ repurpose lr as scratch space. + ldr r3, [sp, #40] @ Reload argc. + ldr lr, [sp, #36] @ .Load argv into lr. + cmp r3, #3 + bhi .Larg_r3 + beq .Larg_r2 + cmp r3, #1 + bhi .Larg_r1 + beq .Larg_r0 + b .Largs_done + +.Larg_r3: + ldr r3, [lr, #12] @ argv[3] +.Larg_r2: + ldr r2, [lr, #8] @ argv[2] +.Larg_r1: + ldr r1, [lr, #4] @ argv[1] +.Larg_r0: + ldr r0, [lr] @ argv[0] +.Largs_done: + + @ With every other register in use, load the function pointer into lr + @ and call the function. + ldr lr, [sp, #28] + blx lr + + @ r1-r3 are free for use again. The trampoline only supports + @ single-return functions. Pass r4-r11 to the caller. + ldr r1, [sp, #32] + vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15} +#if defined(__APPLE__) + @ r9 is not volatile on iOS. + stmia r1!, {r4,r5,r6,r7,r8,r10-r11} +#else + stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11} +#endif + + @ Unwind the stack and restore registers. + add sp, sp, #44 @ 44 = 28+16 + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above). + vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} + + bx lr +.size abi_test_trampoline,.-abi_test_trampoline +.type abi_test_clobber_r0, %function +.globl abi_test_clobber_r0 +.hidden abi_test_clobber_r0 +.align 4 +abi_test_clobber_r0: + mov r0, #0 + bx lr +.size abi_test_clobber_r0,.-abi_test_clobber_r0 +.type abi_test_clobber_r1, %function +.globl abi_test_clobber_r1 +.hidden abi_test_clobber_r1 +.align 4 +abi_test_clobber_r1: + mov r1, #0 + bx lr +.size abi_test_clobber_r1,.-abi_test_clobber_r1 +.type abi_test_clobber_r2, %function +.globl abi_test_clobber_r2 +.hidden abi_test_clobber_r2 +.align 4 +abi_test_clobber_r2: + mov r2, #0 + bx lr +.size abi_test_clobber_r2,.-abi_test_clobber_r2 +.type abi_test_clobber_r3, %function +.globl abi_test_clobber_r3 +.hidden abi_test_clobber_r3 +.align 4 +abi_test_clobber_r3: + mov r3, #0 + bx lr +.size abi_test_clobber_r3,.-abi_test_clobber_r3 +.type abi_test_clobber_r4, %function +.globl abi_test_clobber_r4 +.hidden abi_test_clobber_r4 +.align 4 +abi_test_clobber_r4: + mov r4, #0 + bx lr +.size abi_test_clobber_r4,.-abi_test_clobber_r4 +.type abi_test_clobber_r5, %function +.globl abi_test_clobber_r5 +.hidden abi_test_clobber_r5 +.align 4 +abi_test_clobber_r5: + mov r5, #0 + bx lr +.size abi_test_clobber_r5,.-abi_test_clobber_r5 +.type abi_test_clobber_r6, %function +.globl abi_test_clobber_r6 +.hidden abi_test_clobber_r6 +.align 4 +abi_test_clobber_r6: + mov r6, #0 + bx lr +.size abi_test_clobber_r6,.-abi_test_clobber_r6 +.type abi_test_clobber_r7, %function +.globl abi_test_clobber_r7 +.hidden abi_test_clobber_r7 +.align 4 +abi_test_clobber_r7: + mov r7, #0 + bx lr +.size abi_test_clobber_r7,.-abi_test_clobber_r7 +.type abi_test_clobber_r8, %function +.globl abi_test_clobber_r8 +.hidden abi_test_clobber_r8 +.align 4 +abi_test_clobber_r8: + mov r8, #0 + bx lr +.size abi_test_clobber_r8,.-abi_test_clobber_r8 +.type abi_test_clobber_r9, %function +.globl abi_test_clobber_r9 +.hidden abi_test_clobber_r9 +.align 4 +abi_test_clobber_r9: + mov r9, #0 + bx lr +.size abi_test_clobber_r9,.-abi_test_clobber_r9 +.type abi_test_clobber_r10, %function +.globl abi_test_clobber_r10 +.hidden abi_test_clobber_r10 +.align 4 +abi_test_clobber_r10: + mov r10, #0 + bx lr +.size abi_test_clobber_r10,.-abi_test_clobber_r10 +.type abi_test_clobber_r11, %function +.globl abi_test_clobber_r11 +.hidden abi_test_clobber_r11 +.align 4 +abi_test_clobber_r11: + mov r11, #0 + bx lr +.size abi_test_clobber_r11,.-abi_test_clobber_r11 +.type abi_test_clobber_r12, %function +.globl abi_test_clobber_r12 +.hidden abi_test_clobber_r12 +.align 4 +abi_test_clobber_r12: + mov r12, #0 + bx lr +.size abi_test_clobber_r12,.-abi_test_clobber_r12 +.type abi_test_clobber_d0, %function +.globl abi_test_clobber_d0 +.hidden abi_test_clobber_d0 +.align 4 +abi_test_clobber_d0: + mov r0, #0 + vmov s0, r0 + vmov s1, r0 + bx lr +.size abi_test_clobber_d0,.-abi_test_clobber_d0 +.type abi_test_clobber_d1, %function +.globl abi_test_clobber_d1 +.hidden abi_test_clobber_d1 +.align 4 +abi_test_clobber_d1: + mov r0, #0 + vmov s2, r0 + vmov s3, r0 + bx lr +.size abi_test_clobber_d1,.-abi_test_clobber_d1 +.type abi_test_clobber_d2, %function +.globl abi_test_clobber_d2 +.hidden abi_test_clobber_d2 +.align 4 +abi_test_clobber_d2: + mov r0, #0 + vmov s4, r0 + vmov s5, r0 + bx lr +.size abi_test_clobber_d2,.-abi_test_clobber_d2 +.type abi_test_clobber_d3, %function +.globl abi_test_clobber_d3 +.hidden abi_test_clobber_d3 +.align 4 +abi_test_clobber_d3: + mov r0, #0 + vmov s6, r0 + vmov s7, r0 + bx lr +.size abi_test_clobber_d3,.-abi_test_clobber_d3 +.type abi_test_clobber_d4, %function +.globl abi_test_clobber_d4 +.hidden abi_test_clobber_d4 +.align 4 +abi_test_clobber_d4: + mov r0, #0 + vmov s8, r0 + vmov s9, r0 + bx lr +.size abi_test_clobber_d4,.-abi_test_clobber_d4 +.type abi_test_clobber_d5, %function +.globl abi_test_clobber_d5 +.hidden abi_test_clobber_d5 +.align 4 +abi_test_clobber_d5: + mov r0, #0 + vmov s10, r0 + vmov s11, r0 + bx lr +.size abi_test_clobber_d5,.-abi_test_clobber_d5 +.type abi_test_clobber_d6, %function +.globl abi_test_clobber_d6 +.hidden abi_test_clobber_d6 +.align 4 +abi_test_clobber_d6: + mov r0, #0 + vmov s12, r0 + vmov s13, r0 + bx lr +.size abi_test_clobber_d6,.-abi_test_clobber_d6 +.type abi_test_clobber_d7, %function +.globl abi_test_clobber_d7 +.hidden abi_test_clobber_d7 +.align 4 +abi_test_clobber_d7: + mov r0, #0 + vmov s14, r0 + vmov s15, r0 + bx lr +.size abi_test_clobber_d7,.-abi_test_clobber_d7 +.type abi_test_clobber_d8, %function +.globl abi_test_clobber_d8 +.hidden abi_test_clobber_d8 +.align 4 +abi_test_clobber_d8: + mov r0, #0 + vmov s16, r0 + vmov s17, r0 + bx lr +.size abi_test_clobber_d8,.-abi_test_clobber_d8 +.type abi_test_clobber_d9, %function +.globl abi_test_clobber_d9 +.hidden abi_test_clobber_d9 +.align 4 +abi_test_clobber_d9: + mov r0, #0 + vmov s18, r0 + vmov s19, r0 + bx lr +.size abi_test_clobber_d9,.-abi_test_clobber_d9 +.type abi_test_clobber_d10, %function +.globl abi_test_clobber_d10 +.hidden abi_test_clobber_d10 +.align 4 +abi_test_clobber_d10: + mov r0, #0 + vmov s20, r0 + vmov s21, r0 + bx lr +.size abi_test_clobber_d10,.-abi_test_clobber_d10 +.type abi_test_clobber_d11, %function +.globl abi_test_clobber_d11 +.hidden abi_test_clobber_d11 +.align 4 +abi_test_clobber_d11: + mov r0, #0 + vmov s22, r0 + vmov s23, r0 + bx lr +.size abi_test_clobber_d11,.-abi_test_clobber_d11 +.type abi_test_clobber_d12, %function +.globl abi_test_clobber_d12 +.hidden abi_test_clobber_d12 +.align 4 +abi_test_clobber_d12: + mov r0, #0 + vmov s24, r0 + vmov s25, r0 + bx lr +.size abi_test_clobber_d12,.-abi_test_clobber_d12 +.type abi_test_clobber_d13, %function +.globl abi_test_clobber_d13 +.hidden abi_test_clobber_d13 +.align 4 +abi_test_clobber_d13: + mov r0, #0 + vmov s26, r0 + vmov s27, r0 + bx lr +.size abi_test_clobber_d13,.-abi_test_clobber_d13 +.type abi_test_clobber_d14, %function +.globl abi_test_clobber_d14 +.hidden abi_test_clobber_d14 +.align 4 +abi_test_clobber_d14: + mov r0, #0 + vmov s28, r0 + vmov s29, r0 + bx lr +.size abi_test_clobber_d14,.-abi_test_clobber_d14 +.type abi_test_clobber_d15, %function +.globl abi_test_clobber_d15 +.hidden abi_test_clobber_d15 +.align 4 +abi_test_clobber_d15: + mov r0, #0 + vmov s30, r0 + vmov s31, r0 + bx lr +.size abi_test_clobber_d15,.-abi_test_clobber_d15 +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-apple.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-apple.S new file mode 100644 index 0000000000..99055e0ad4 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-apple.S @@ -0,0 +1,750 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) +#include + +.text + +// abi_test_trampoline loads callee-saved registers from |state|, calls |func| +// with |argv|, then saves the callee-saved registers into |state|. It returns +// the result of |func|. The |unwind| argument is unused. +// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, +// const uint64_t *argv, size_t argc, +// uint64_t unwind); + +.globl _abi_test_trampoline +.private_extern _abi_test_trampoline +.align 4 +_abi_test_trampoline: +Labi_test_trampoline_begin: + AARCH64_SIGN_LINK_REGISTER + // Stack layout (low to high addresses) + // x29,x30 (16 bytes) + // d8-d15 (64 bytes) + // x19-x28 (80 bytes) + // x1 (8 bytes) + // padding (8 bytes) + stp x29, x30, [sp, #-176]! + mov x29, sp + + // Saved callee-saved registers and |state|. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] + stp x19, x20, [sp, #80] + stp x21, x22, [sp, #96] + stp x23, x24, [sp, #112] + stp x25, x26, [sp, #128] + stp x27, x28, [sp, #144] + str x1, [sp, #160] + + // Load registers from |state|, with the exception of x29. x29 is the + // frame pointer and also callee-saved, but AAPCS64 allows platforms to + // mandate that x29 always point to a frame. iOS64 does so, which means + // we cannot fill x29 with entropy without violating ABI rules + // ourselves. x29 is tested separately below. + ldp d8, d9, [x1], #16 + ldp d10, d11, [x1], #16 + ldp d12, d13, [x1], #16 + ldp d14, d15, [x1], #16 + ldp x19, x20, [x1], #16 + ldp x21, x22, [x1], #16 + ldp x23, x24, [x1], #16 + ldp x25, x26, [x1], #16 + ldp x27, x28, [x1], #16 + + // Move parameters into temporary registers. + mov x9, x0 + mov x10, x2 + mov x11, x3 + + // Load parameters into registers. + cbz x11, Largs_done + ldr x0, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x1, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x2, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x3, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x4, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x5, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x6, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x7, [x10], #8 + +Largs_done: + blr x9 + + // Reload |state| and store registers. + ldr x1, [sp, #160] + stp d8, d9, [x1], #16 + stp d10, d11, [x1], #16 + stp d12, d13, [x1], #16 + stp d14, d15, [x1], #16 + stp x19, x20, [x1], #16 + stp x21, x22, [x1], #16 + stp x23, x24, [x1], #16 + stp x25, x26, [x1], #16 + stp x27, x28, [x1], #16 + + // |func| is required to preserve x29, the frame pointer. We cannot load + // random values into x29 (see comment above), so compare it against the + // expected value and zero the field of |state| if corrupted. + mov x9, sp + cmp x29, x9 + b.eq Lx29_ok + str xzr, [x1] + +Lx29_ok: + // Restore callee-saved registers. + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] + ldp x19, x20, [sp, #80] + ldp x21, x22, [sp, #96] + ldp x23, x24, [sp, #112] + ldp x25, x26, [sp, #128] + ldp x27, x28, [sp, #144] + + ldp x29, x30, [sp], #176 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.globl _abi_test_clobber_x0 +.private_extern _abi_test_clobber_x0 +.align 4 +_abi_test_clobber_x0: + AARCH64_VALID_CALL_TARGET + mov x0, xzr + ret + + +.globl _abi_test_clobber_x1 +.private_extern _abi_test_clobber_x1 +.align 4 +_abi_test_clobber_x1: + AARCH64_VALID_CALL_TARGET + mov x1, xzr + ret + + +.globl _abi_test_clobber_x2 +.private_extern _abi_test_clobber_x2 +.align 4 +_abi_test_clobber_x2: + AARCH64_VALID_CALL_TARGET + mov x2, xzr + ret + + +.globl _abi_test_clobber_x3 +.private_extern _abi_test_clobber_x3 +.align 4 +_abi_test_clobber_x3: + AARCH64_VALID_CALL_TARGET + mov x3, xzr + ret + + +.globl _abi_test_clobber_x4 +.private_extern _abi_test_clobber_x4 +.align 4 +_abi_test_clobber_x4: + AARCH64_VALID_CALL_TARGET + mov x4, xzr + ret + + +.globl _abi_test_clobber_x5 +.private_extern _abi_test_clobber_x5 +.align 4 +_abi_test_clobber_x5: + AARCH64_VALID_CALL_TARGET + mov x5, xzr + ret + + +.globl _abi_test_clobber_x6 +.private_extern _abi_test_clobber_x6 +.align 4 +_abi_test_clobber_x6: + AARCH64_VALID_CALL_TARGET + mov x6, xzr + ret + + +.globl _abi_test_clobber_x7 +.private_extern _abi_test_clobber_x7 +.align 4 +_abi_test_clobber_x7: + AARCH64_VALID_CALL_TARGET + mov x7, xzr + ret + + +.globl _abi_test_clobber_x8 +.private_extern _abi_test_clobber_x8 +.align 4 +_abi_test_clobber_x8: + AARCH64_VALID_CALL_TARGET + mov x8, xzr + ret + + +.globl _abi_test_clobber_x9 +.private_extern _abi_test_clobber_x9 +.align 4 +_abi_test_clobber_x9: + AARCH64_VALID_CALL_TARGET + mov x9, xzr + ret + + +.globl _abi_test_clobber_x10 +.private_extern _abi_test_clobber_x10 +.align 4 +_abi_test_clobber_x10: + AARCH64_VALID_CALL_TARGET + mov x10, xzr + ret + + +.globl _abi_test_clobber_x11 +.private_extern _abi_test_clobber_x11 +.align 4 +_abi_test_clobber_x11: + AARCH64_VALID_CALL_TARGET + mov x11, xzr + ret + + +.globl _abi_test_clobber_x12 +.private_extern _abi_test_clobber_x12 +.align 4 +_abi_test_clobber_x12: + AARCH64_VALID_CALL_TARGET + mov x12, xzr + ret + + +.globl _abi_test_clobber_x13 +.private_extern _abi_test_clobber_x13 +.align 4 +_abi_test_clobber_x13: + AARCH64_VALID_CALL_TARGET + mov x13, xzr + ret + + +.globl _abi_test_clobber_x14 +.private_extern _abi_test_clobber_x14 +.align 4 +_abi_test_clobber_x14: + AARCH64_VALID_CALL_TARGET + mov x14, xzr + ret + + +.globl _abi_test_clobber_x15 +.private_extern _abi_test_clobber_x15 +.align 4 +_abi_test_clobber_x15: + AARCH64_VALID_CALL_TARGET + mov x15, xzr + ret + + +.globl _abi_test_clobber_x16 +.private_extern _abi_test_clobber_x16 +.align 4 +_abi_test_clobber_x16: + AARCH64_VALID_CALL_TARGET + mov x16, xzr + ret + + +.globl _abi_test_clobber_x17 +.private_extern _abi_test_clobber_x17 +.align 4 +_abi_test_clobber_x17: + AARCH64_VALID_CALL_TARGET + mov x17, xzr + ret + + +.globl _abi_test_clobber_x19 +.private_extern _abi_test_clobber_x19 +.align 4 +_abi_test_clobber_x19: + AARCH64_VALID_CALL_TARGET + mov x19, xzr + ret + + +.globl _abi_test_clobber_x20 +.private_extern _abi_test_clobber_x20 +.align 4 +_abi_test_clobber_x20: + AARCH64_VALID_CALL_TARGET + mov x20, xzr + ret + + +.globl _abi_test_clobber_x21 +.private_extern _abi_test_clobber_x21 +.align 4 +_abi_test_clobber_x21: + AARCH64_VALID_CALL_TARGET + mov x21, xzr + ret + + +.globl _abi_test_clobber_x22 +.private_extern _abi_test_clobber_x22 +.align 4 +_abi_test_clobber_x22: + AARCH64_VALID_CALL_TARGET + mov x22, xzr + ret + + +.globl _abi_test_clobber_x23 +.private_extern _abi_test_clobber_x23 +.align 4 +_abi_test_clobber_x23: + AARCH64_VALID_CALL_TARGET + mov x23, xzr + ret + + +.globl _abi_test_clobber_x24 +.private_extern _abi_test_clobber_x24 +.align 4 +_abi_test_clobber_x24: + AARCH64_VALID_CALL_TARGET + mov x24, xzr + ret + + +.globl _abi_test_clobber_x25 +.private_extern _abi_test_clobber_x25 +.align 4 +_abi_test_clobber_x25: + AARCH64_VALID_CALL_TARGET + mov x25, xzr + ret + + +.globl _abi_test_clobber_x26 +.private_extern _abi_test_clobber_x26 +.align 4 +_abi_test_clobber_x26: + AARCH64_VALID_CALL_TARGET + mov x26, xzr + ret + + +.globl _abi_test_clobber_x27 +.private_extern _abi_test_clobber_x27 +.align 4 +_abi_test_clobber_x27: + AARCH64_VALID_CALL_TARGET + mov x27, xzr + ret + + +.globl _abi_test_clobber_x28 +.private_extern _abi_test_clobber_x28 +.align 4 +_abi_test_clobber_x28: + AARCH64_VALID_CALL_TARGET + mov x28, xzr + ret + + +.globl _abi_test_clobber_x29 +.private_extern _abi_test_clobber_x29 +.align 4 +_abi_test_clobber_x29: + AARCH64_VALID_CALL_TARGET + mov x29, xzr + ret + + +.globl _abi_test_clobber_d0 +.private_extern _abi_test_clobber_d0 +.align 4 +_abi_test_clobber_d0: + AARCH64_VALID_CALL_TARGET + fmov d0, xzr + ret + + +.globl _abi_test_clobber_d1 +.private_extern _abi_test_clobber_d1 +.align 4 +_abi_test_clobber_d1: + AARCH64_VALID_CALL_TARGET + fmov d1, xzr + ret + + +.globl _abi_test_clobber_d2 +.private_extern _abi_test_clobber_d2 +.align 4 +_abi_test_clobber_d2: + AARCH64_VALID_CALL_TARGET + fmov d2, xzr + ret + + +.globl _abi_test_clobber_d3 +.private_extern _abi_test_clobber_d3 +.align 4 +_abi_test_clobber_d3: + AARCH64_VALID_CALL_TARGET + fmov d3, xzr + ret + + +.globl _abi_test_clobber_d4 +.private_extern _abi_test_clobber_d4 +.align 4 +_abi_test_clobber_d4: + AARCH64_VALID_CALL_TARGET + fmov d4, xzr + ret + + +.globl _abi_test_clobber_d5 +.private_extern _abi_test_clobber_d5 +.align 4 +_abi_test_clobber_d5: + AARCH64_VALID_CALL_TARGET + fmov d5, xzr + ret + + +.globl _abi_test_clobber_d6 +.private_extern _abi_test_clobber_d6 +.align 4 +_abi_test_clobber_d6: + AARCH64_VALID_CALL_TARGET + fmov d6, xzr + ret + + +.globl _abi_test_clobber_d7 +.private_extern _abi_test_clobber_d7 +.align 4 +_abi_test_clobber_d7: + AARCH64_VALID_CALL_TARGET + fmov d7, xzr + ret + + +.globl _abi_test_clobber_d8 +.private_extern _abi_test_clobber_d8 +.align 4 +_abi_test_clobber_d8: + AARCH64_VALID_CALL_TARGET + fmov d8, xzr + ret + + +.globl _abi_test_clobber_d9 +.private_extern _abi_test_clobber_d9 +.align 4 +_abi_test_clobber_d9: + AARCH64_VALID_CALL_TARGET + fmov d9, xzr + ret + + +.globl _abi_test_clobber_d10 +.private_extern _abi_test_clobber_d10 +.align 4 +_abi_test_clobber_d10: + AARCH64_VALID_CALL_TARGET + fmov d10, xzr + ret + + +.globl _abi_test_clobber_d11 +.private_extern _abi_test_clobber_d11 +.align 4 +_abi_test_clobber_d11: + AARCH64_VALID_CALL_TARGET + fmov d11, xzr + ret + + +.globl _abi_test_clobber_d12 +.private_extern _abi_test_clobber_d12 +.align 4 +_abi_test_clobber_d12: + AARCH64_VALID_CALL_TARGET + fmov d12, xzr + ret + + +.globl _abi_test_clobber_d13 +.private_extern _abi_test_clobber_d13 +.align 4 +_abi_test_clobber_d13: + AARCH64_VALID_CALL_TARGET + fmov d13, xzr + ret + + +.globl _abi_test_clobber_d14 +.private_extern _abi_test_clobber_d14 +.align 4 +_abi_test_clobber_d14: + AARCH64_VALID_CALL_TARGET + fmov d14, xzr + ret + + +.globl _abi_test_clobber_d15 +.private_extern _abi_test_clobber_d15 +.align 4 +_abi_test_clobber_d15: + AARCH64_VALID_CALL_TARGET + fmov d15, xzr + ret + + +.globl _abi_test_clobber_d16 +.private_extern _abi_test_clobber_d16 +.align 4 +_abi_test_clobber_d16: + AARCH64_VALID_CALL_TARGET + fmov d16, xzr + ret + + +.globl _abi_test_clobber_d17 +.private_extern _abi_test_clobber_d17 +.align 4 +_abi_test_clobber_d17: + AARCH64_VALID_CALL_TARGET + fmov d17, xzr + ret + + +.globl _abi_test_clobber_d18 +.private_extern _abi_test_clobber_d18 +.align 4 +_abi_test_clobber_d18: + AARCH64_VALID_CALL_TARGET + fmov d18, xzr + ret + + +.globl _abi_test_clobber_d19 +.private_extern _abi_test_clobber_d19 +.align 4 +_abi_test_clobber_d19: + AARCH64_VALID_CALL_TARGET + fmov d19, xzr + ret + + +.globl _abi_test_clobber_d20 +.private_extern _abi_test_clobber_d20 +.align 4 +_abi_test_clobber_d20: + AARCH64_VALID_CALL_TARGET + fmov d20, xzr + ret + + +.globl _abi_test_clobber_d21 +.private_extern _abi_test_clobber_d21 +.align 4 +_abi_test_clobber_d21: + AARCH64_VALID_CALL_TARGET + fmov d21, xzr + ret + + +.globl _abi_test_clobber_d22 +.private_extern _abi_test_clobber_d22 +.align 4 +_abi_test_clobber_d22: + AARCH64_VALID_CALL_TARGET + fmov d22, xzr + ret + + +.globl _abi_test_clobber_d23 +.private_extern _abi_test_clobber_d23 +.align 4 +_abi_test_clobber_d23: + AARCH64_VALID_CALL_TARGET + fmov d23, xzr + ret + + +.globl _abi_test_clobber_d24 +.private_extern _abi_test_clobber_d24 +.align 4 +_abi_test_clobber_d24: + AARCH64_VALID_CALL_TARGET + fmov d24, xzr + ret + + +.globl _abi_test_clobber_d25 +.private_extern _abi_test_clobber_d25 +.align 4 +_abi_test_clobber_d25: + AARCH64_VALID_CALL_TARGET + fmov d25, xzr + ret + + +.globl _abi_test_clobber_d26 +.private_extern _abi_test_clobber_d26 +.align 4 +_abi_test_clobber_d26: + AARCH64_VALID_CALL_TARGET + fmov d26, xzr + ret + + +.globl _abi_test_clobber_d27 +.private_extern _abi_test_clobber_d27 +.align 4 +_abi_test_clobber_d27: + AARCH64_VALID_CALL_TARGET + fmov d27, xzr + ret + + +.globl _abi_test_clobber_d28 +.private_extern _abi_test_clobber_d28 +.align 4 +_abi_test_clobber_d28: + AARCH64_VALID_CALL_TARGET + fmov d28, xzr + ret + + +.globl _abi_test_clobber_d29 +.private_extern _abi_test_clobber_d29 +.align 4 +_abi_test_clobber_d29: + AARCH64_VALID_CALL_TARGET + fmov d29, xzr + ret + + +.globl _abi_test_clobber_d30 +.private_extern _abi_test_clobber_d30 +.align 4 +_abi_test_clobber_d30: + AARCH64_VALID_CALL_TARGET + fmov d30, xzr + ret + + +.globl _abi_test_clobber_d31 +.private_extern _abi_test_clobber_d31 +.align 4 +_abi_test_clobber_d31: + AARCH64_VALID_CALL_TARGET + fmov d31, xzr + ret + + +.globl _abi_test_clobber_v8_upper +.private_extern _abi_test_clobber_v8_upper +.align 4 +_abi_test_clobber_v8_upper: + AARCH64_VALID_CALL_TARGET + fmov v8.d[1], xzr + ret + + +.globl _abi_test_clobber_v9_upper +.private_extern _abi_test_clobber_v9_upper +.align 4 +_abi_test_clobber_v9_upper: + AARCH64_VALID_CALL_TARGET + fmov v9.d[1], xzr + ret + + +.globl _abi_test_clobber_v10_upper +.private_extern _abi_test_clobber_v10_upper +.align 4 +_abi_test_clobber_v10_upper: + AARCH64_VALID_CALL_TARGET + fmov v10.d[1], xzr + ret + + +.globl _abi_test_clobber_v11_upper +.private_extern _abi_test_clobber_v11_upper +.align 4 +_abi_test_clobber_v11_upper: + AARCH64_VALID_CALL_TARGET + fmov v11.d[1], xzr + ret + + +.globl _abi_test_clobber_v12_upper +.private_extern _abi_test_clobber_v12_upper +.align 4 +_abi_test_clobber_v12_upper: + AARCH64_VALID_CALL_TARGET + fmov v12.d[1], xzr + ret + + +.globl _abi_test_clobber_v13_upper +.private_extern _abi_test_clobber_v13_upper +.align 4 +_abi_test_clobber_v13_upper: + AARCH64_VALID_CALL_TARGET + fmov v13.d[1], xzr + ret + + +.globl _abi_test_clobber_v14_upper +.private_extern _abi_test_clobber_v14_upper +.align 4 +_abi_test_clobber_v14_upper: + AARCH64_VALID_CALL_TARGET + fmov v14.d[1], xzr + ret + + +.globl _abi_test_clobber_v15_upper +.private_extern _abi_test_clobber_v15_upper +.align 4 +_abi_test_clobber_v15_upper: + AARCH64_VALID_CALL_TARGET + fmov v15.d[1], xzr + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-linux.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-linux.S new file mode 100644 index 0000000000..58b4b93944 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-linux.S @@ -0,0 +1,750 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) +#include + +.text + +// abi_test_trampoline loads callee-saved registers from |state|, calls |func| +// with |argv|, then saves the callee-saved registers into |state|. It returns +// the result of |func|. The |unwind| argument is unused. +// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, +// const uint64_t *argv, size_t argc, +// uint64_t unwind); +.type abi_test_trampoline, %function +.globl abi_test_trampoline +.hidden abi_test_trampoline +.align 4 +abi_test_trampoline: +.Labi_test_trampoline_begin: + AARCH64_SIGN_LINK_REGISTER + // Stack layout (low to high addresses) + // x29,x30 (16 bytes) + // d8-d15 (64 bytes) + // x19-x28 (80 bytes) + // x1 (8 bytes) + // padding (8 bytes) + stp x29, x30, [sp, #-176]! + mov x29, sp + + // Saved callee-saved registers and |state|. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] + stp x19, x20, [sp, #80] + stp x21, x22, [sp, #96] + stp x23, x24, [sp, #112] + stp x25, x26, [sp, #128] + stp x27, x28, [sp, #144] + str x1, [sp, #160] + + // Load registers from |state|, with the exception of x29. x29 is the + // frame pointer and also callee-saved, but AAPCS64 allows platforms to + // mandate that x29 always point to a frame. iOS64 does so, which means + // we cannot fill x29 with entropy without violating ABI rules + // ourselves. x29 is tested separately below. + ldp d8, d9, [x1], #16 + ldp d10, d11, [x1], #16 + ldp d12, d13, [x1], #16 + ldp d14, d15, [x1], #16 + ldp x19, x20, [x1], #16 + ldp x21, x22, [x1], #16 + ldp x23, x24, [x1], #16 + ldp x25, x26, [x1], #16 + ldp x27, x28, [x1], #16 + + // Move parameters into temporary registers. + mov x9, x0 + mov x10, x2 + mov x11, x3 + + // Load parameters into registers. + cbz x11, .Largs_done + ldr x0, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x1, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x2, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x3, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x4, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x5, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x6, [x10], #8 + subs x11, x11, #1 + b.eq .Largs_done + ldr x7, [x10], #8 + +.Largs_done: + blr x9 + + // Reload |state| and store registers. + ldr x1, [sp, #160] + stp d8, d9, [x1], #16 + stp d10, d11, [x1], #16 + stp d12, d13, [x1], #16 + stp d14, d15, [x1], #16 + stp x19, x20, [x1], #16 + stp x21, x22, [x1], #16 + stp x23, x24, [x1], #16 + stp x25, x26, [x1], #16 + stp x27, x28, [x1], #16 + + // |func| is required to preserve x29, the frame pointer. We cannot load + // random values into x29 (see comment above), so compare it against the + // expected value and zero the field of |state| if corrupted. + mov x9, sp + cmp x29, x9 + b.eq .Lx29_ok + str xzr, [x1] + +.Lx29_ok: + // Restore callee-saved registers. + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] + ldp x19, x20, [sp, #80] + ldp x21, x22, [sp, #96] + ldp x23, x24, [sp, #112] + ldp x25, x26, [sp, #128] + ldp x27, x28, [sp, #144] + + ldp x29, x30, [sp], #176 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size abi_test_trampoline,.-abi_test_trampoline +.type abi_test_clobber_x0, %function +.globl abi_test_clobber_x0 +.hidden abi_test_clobber_x0 +.align 4 +abi_test_clobber_x0: + AARCH64_VALID_CALL_TARGET + mov x0, xzr + ret +.size abi_test_clobber_x0,.-abi_test_clobber_x0 +.type abi_test_clobber_x1, %function +.globl abi_test_clobber_x1 +.hidden abi_test_clobber_x1 +.align 4 +abi_test_clobber_x1: + AARCH64_VALID_CALL_TARGET + mov x1, xzr + ret +.size abi_test_clobber_x1,.-abi_test_clobber_x1 +.type abi_test_clobber_x2, %function +.globl abi_test_clobber_x2 +.hidden abi_test_clobber_x2 +.align 4 +abi_test_clobber_x2: + AARCH64_VALID_CALL_TARGET + mov x2, xzr + ret +.size abi_test_clobber_x2,.-abi_test_clobber_x2 +.type abi_test_clobber_x3, %function +.globl abi_test_clobber_x3 +.hidden abi_test_clobber_x3 +.align 4 +abi_test_clobber_x3: + AARCH64_VALID_CALL_TARGET + mov x3, xzr + ret +.size abi_test_clobber_x3,.-abi_test_clobber_x3 +.type abi_test_clobber_x4, %function +.globl abi_test_clobber_x4 +.hidden abi_test_clobber_x4 +.align 4 +abi_test_clobber_x4: + AARCH64_VALID_CALL_TARGET + mov x4, xzr + ret +.size abi_test_clobber_x4,.-abi_test_clobber_x4 +.type abi_test_clobber_x5, %function +.globl abi_test_clobber_x5 +.hidden abi_test_clobber_x5 +.align 4 +abi_test_clobber_x5: + AARCH64_VALID_CALL_TARGET + mov x5, xzr + ret +.size abi_test_clobber_x5,.-abi_test_clobber_x5 +.type abi_test_clobber_x6, %function +.globl abi_test_clobber_x6 +.hidden abi_test_clobber_x6 +.align 4 +abi_test_clobber_x6: + AARCH64_VALID_CALL_TARGET + mov x6, xzr + ret +.size abi_test_clobber_x6,.-abi_test_clobber_x6 +.type abi_test_clobber_x7, %function +.globl abi_test_clobber_x7 +.hidden abi_test_clobber_x7 +.align 4 +abi_test_clobber_x7: + AARCH64_VALID_CALL_TARGET + mov x7, xzr + ret +.size abi_test_clobber_x7,.-abi_test_clobber_x7 +.type abi_test_clobber_x8, %function +.globl abi_test_clobber_x8 +.hidden abi_test_clobber_x8 +.align 4 +abi_test_clobber_x8: + AARCH64_VALID_CALL_TARGET + mov x8, xzr + ret +.size abi_test_clobber_x8,.-abi_test_clobber_x8 +.type abi_test_clobber_x9, %function +.globl abi_test_clobber_x9 +.hidden abi_test_clobber_x9 +.align 4 +abi_test_clobber_x9: + AARCH64_VALID_CALL_TARGET + mov x9, xzr + ret +.size abi_test_clobber_x9,.-abi_test_clobber_x9 +.type abi_test_clobber_x10, %function +.globl abi_test_clobber_x10 +.hidden abi_test_clobber_x10 +.align 4 +abi_test_clobber_x10: + AARCH64_VALID_CALL_TARGET + mov x10, xzr + ret +.size abi_test_clobber_x10,.-abi_test_clobber_x10 +.type abi_test_clobber_x11, %function +.globl abi_test_clobber_x11 +.hidden abi_test_clobber_x11 +.align 4 +abi_test_clobber_x11: + AARCH64_VALID_CALL_TARGET + mov x11, xzr + ret +.size abi_test_clobber_x11,.-abi_test_clobber_x11 +.type abi_test_clobber_x12, %function +.globl abi_test_clobber_x12 +.hidden abi_test_clobber_x12 +.align 4 +abi_test_clobber_x12: + AARCH64_VALID_CALL_TARGET + mov x12, xzr + ret +.size abi_test_clobber_x12,.-abi_test_clobber_x12 +.type abi_test_clobber_x13, %function +.globl abi_test_clobber_x13 +.hidden abi_test_clobber_x13 +.align 4 +abi_test_clobber_x13: + AARCH64_VALID_CALL_TARGET + mov x13, xzr + ret +.size abi_test_clobber_x13,.-abi_test_clobber_x13 +.type abi_test_clobber_x14, %function +.globl abi_test_clobber_x14 +.hidden abi_test_clobber_x14 +.align 4 +abi_test_clobber_x14: + AARCH64_VALID_CALL_TARGET + mov x14, xzr + ret +.size abi_test_clobber_x14,.-abi_test_clobber_x14 +.type abi_test_clobber_x15, %function +.globl abi_test_clobber_x15 +.hidden abi_test_clobber_x15 +.align 4 +abi_test_clobber_x15: + AARCH64_VALID_CALL_TARGET + mov x15, xzr + ret +.size abi_test_clobber_x15,.-abi_test_clobber_x15 +.type abi_test_clobber_x16, %function +.globl abi_test_clobber_x16 +.hidden abi_test_clobber_x16 +.align 4 +abi_test_clobber_x16: + AARCH64_VALID_CALL_TARGET + mov x16, xzr + ret +.size abi_test_clobber_x16,.-abi_test_clobber_x16 +.type abi_test_clobber_x17, %function +.globl abi_test_clobber_x17 +.hidden abi_test_clobber_x17 +.align 4 +abi_test_clobber_x17: + AARCH64_VALID_CALL_TARGET + mov x17, xzr + ret +.size abi_test_clobber_x17,.-abi_test_clobber_x17 +.type abi_test_clobber_x19, %function +.globl abi_test_clobber_x19 +.hidden abi_test_clobber_x19 +.align 4 +abi_test_clobber_x19: + AARCH64_VALID_CALL_TARGET + mov x19, xzr + ret +.size abi_test_clobber_x19,.-abi_test_clobber_x19 +.type abi_test_clobber_x20, %function +.globl abi_test_clobber_x20 +.hidden abi_test_clobber_x20 +.align 4 +abi_test_clobber_x20: + AARCH64_VALID_CALL_TARGET + mov x20, xzr + ret +.size abi_test_clobber_x20,.-abi_test_clobber_x20 +.type abi_test_clobber_x21, %function +.globl abi_test_clobber_x21 +.hidden abi_test_clobber_x21 +.align 4 +abi_test_clobber_x21: + AARCH64_VALID_CALL_TARGET + mov x21, xzr + ret +.size abi_test_clobber_x21,.-abi_test_clobber_x21 +.type abi_test_clobber_x22, %function +.globl abi_test_clobber_x22 +.hidden abi_test_clobber_x22 +.align 4 +abi_test_clobber_x22: + AARCH64_VALID_CALL_TARGET + mov x22, xzr + ret +.size abi_test_clobber_x22,.-abi_test_clobber_x22 +.type abi_test_clobber_x23, %function +.globl abi_test_clobber_x23 +.hidden abi_test_clobber_x23 +.align 4 +abi_test_clobber_x23: + AARCH64_VALID_CALL_TARGET + mov x23, xzr + ret +.size abi_test_clobber_x23,.-abi_test_clobber_x23 +.type abi_test_clobber_x24, %function +.globl abi_test_clobber_x24 +.hidden abi_test_clobber_x24 +.align 4 +abi_test_clobber_x24: + AARCH64_VALID_CALL_TARGET + mov x24, xzr + ret +.size abi_test_clobber_x24,.-abi_test_clobber_x24 +.type abi_test_clobber_x25, %function +.globl abi_test_clobber_x25 +.hidden abi_test_clobber_x25 +.align 4 +abi_test_clobber_x25: + AARCH64_VALID_CALL_TARGET + mov x25, xzr + ret +.size abi_test_clobber_x25,.-abi_test_clobber_x25 +.type abi_test_clobber_x26, %function +.globl abi_test_clobber_x26 +.hidden abi_test_clobber_x26 +.align 4 +abi_test_clobber_x26: + AARCH64_VALID_CALL_TARGET + mov x26, xzr + ret +.size abi_test_clobber_x26,.-abi_test_clobber_x26 +.type abi_test_clobber_x27, %function +.globl abi_test_clobber_x27 +.hidden abi_test_clobber_x27 +.align 4 +abi_test_clobber_x27: + AARCH64_VALID_CALL_TARGET + mov x27, xzr + ret +.size abi_test_clobber_x27,.-abi_test_clobber_x27 +.type abi_test_clobber_x28, %function +.globl abi_test_clobber_x28 +.hidden abi_test_clobber_x28 +.align 4 +abi_test_clobber_x28: + AARCH64_VALID_CALL_TARGET + mov x28, xzr + ret +.size abi_test_clobber_x28,.-abi_test_clobber_x28 +.type abi_test_clobber_x29, %function +.globl abi_test_clobber_x29 +.hidden abi_test_clobber_x29 +.align 4 +abi_test_clobber_x29: + AARCH64_VALID_CALL_TARGET + mov x29, xzr + ret +.size abi_test_clobber_x29,.-abi_test_clobber_x29 +.type abi_test_clobber_d0, %function +.globl abi_test_clobber_d0 +.hidden abi_test_clobber_d0 +.align 4 +abi_test_clobber_d0: + AARCH64_VALID_CALL_TARGET + fmov d0, xzr + ret +.size abi_test_clobber_d0,.-abi_test_clobber_d0 +.type abi_test_clobber_d1, %function +.globl abi_test_clobber_d1 +.hidden abi_test_clobber_d1 +.align 4 +abi_test_clobber_d1: + AARCH64_VALID_CALL_TARGET + fmov d1, xzr + ret +.size abi_test_clobber_d1,.-abi_test_clobber_d1 +.type abi_test_clobber_d2, %function +.globl abi_test_clobber_d2 +.hidden abi_test_clobber_d2 +.align 4 +abi_test_clobber_d2: + AARCH64_VALID_CALL_TARGET + fmov d2, xzr + ret +.size abi_test_clobber_d2,.-abi_test_clobber_d2 +.type abi_test_clobber_d3, %function +.globl abi_test_clobber_d3 +.hidden abi_test_clobber_d3 +.align 4 +abi_test_clobber_d3: + AARCH64_VALID_CALL_TARGET + fmov d3, xzr + ret +.size abi_test_clobber_d3,.-abi_test_clobber_d3 +.type abi_test_clobber_d4, %function +.globl abi_test_clobber_d4 +.hidden abi_test_clobber_d4 +.align 4 +abi_test_clobber_d4: + AARCH64_VALID_CALL_TARGET + fmov d4, xzr + ret +.size abi_test_clobber_d4,.-abi_test_clobber_d4 +.type abi_test_clobber_d5, %function +.globl abi_test_clobber_d5 +.hidden abi_test_clobber_d5 +.align 4 +abi_test_clobber_d5: + AARCH64_VALID_CALL_TARGET + fmov d5, xzr + ret +.size abi_test_clobber_d5,.-abi_test_clobber_d5 +.type abi_test_clobber_d6, %function +.globl abi_test_clobber_d6 +.hidden abi_test_clobber_d6 +.align 4 +abi_test_clobber_d6: + AARCH64_VALID_CALL_TARGET + fmov d6, xzr + ret +.size abi_test_clobber_d6,.-abi_test_clobber_d6 +.type abi_test_clobber_d7, %function +.globl abi_test_clobber_d7 +.hidden abi_test_clobber_d7 +.align 4 +abi_test_clobber_d7: + AARCH64_VALID_CALL_TARGET + fmov d7, xzr + ret +.size abi_test_clobber_d7,.-abi_test_clobber_d7 +.type abi_test_clobber_d8, %function +.globl abi_test_clobber_d8 +.hidden abi_test_clobber_d8 +.align 4 +abi_test_clobber_d8: + AARCH64_VALID_CALL_TARGET + fmov d8, xzr + ret +.size abi_test_clobber_d8,.-abi_test_clobber_d8 +.type abi_test_clobber_d9, %function +.globl abi_test_clobber_d9 +.hidden abi_test_clobber_d9 +.align 4 +abi_test_clobber_d9: + AARCH64_VALID_CALL_TARGET + fmov d9, xzr + ret +.size abi_test_clobber_d9,.-abi_test_clobber_d9 +.type abi_test_clobber_d10, %function +.globl abi_test_clobber_d10 +.hidden abi_test_clobber_d10 +.align 4 +abi_test_clobber_d10: + AARCH64_VALID_CALL_TARGET + fmov d10, xzr + ret +.size abi_test_clobber_d10,.-abi_test_clobber_d10 +.type abi_test_clobber_d11, %function +.globl abi_test_clobber_d11 +.hidden abi_test_clobber_d11 +.align 4 +abi_test_clobber_d11: + AARCH64_VALID_CALL_TARGET + fmov d11, xzr + ret +.size abi_test_clobber_d11,.-abi_test_clobber_d11 +.type abi_test_clobber_d12, %function +.globl abi_test_clobber_d12 +.hidden abi_test_clobber_d12 +.align 4 +abi_test_clobber_d12: + AARCH64_VALID_CALL_TARGET + fmov d12, xzr + ret +.size abi_test_clobber_d12,.-abi_test_clobber_d12 +.type abi_test_clobber_d13, %function +.globl abi_test_clobber_d13 +.hidden abi_test_clobber_d13 +.align 4 +abi_test_clobber_d13: + AARCH64_VALID_CALL_TARGET + fmov d13, xzr + ret +.size abi_test_clobber_d13,.-abi_test_clobber_d13 +.type abi_test_clobber_d14, %function +.globl abi_test_clobber_d14 +.hidden abi_test_clobber_d14 +.align 4 +abi_test_clobber_d14: + AARCH64_VALID_CALL_TARGET + fmov d14, xzr + ret +.size abi_test_clobber_d14,.-abi_test_clobber_d14 +.type abi_test_clobber_d15, %function +.globl abi_test_clobber_d15 +.hidden abi_test_clobber_d15 +.align 4 +abi_test_clobber_d15: + AARCH64_VALID_CALL_TARGET + fmov d15, xzr + ret +.size abi_test_clobber_d15,.-abi_test_clobber_d15 +.type abi_test_clobber_d16, %function +.globl abi_test_clobber_d16 +.hidden abi_test_clobber_d16 +.align 4 +abi_test_clobber_d16: + AARCH64_VALID_CALL_TARGET + fmov d16, xzr + ret +.size abi_test_clobber_d16,.-abi_test_clobber_d16 +.type abi_test_clobber_d17, %function +.globl abi_test_clobber_d17 +.hidden abi_test_clobber_d17 +.align 4 +abi_test_clobber_d17: + AARCH64_VALID_CALL_TARGET + fmov d17, xzr + ret +.size abi_test_clobber_d17,.-abi_test_clobber_d17 +.type abi_test_clobber_d18, %function +.globl abi_test_clobber_d18 +.hidden abi_test_clobber_d18 +.align 4 +abi_test_clobber_d18: + AARCH64_VALID_CALL_TARGET + fmov d18, xzr + ret +.size abi_test_clobber_d18,.-abi_test_clobber_d18 +.type abi_test_clobber_d19, %function +.globl abi_test_clobber_d19 +.hidden abi_test_clobber_d19 +.align 4 +abi_test_clobber_d19: + AARCH64_VALID_CALL_TARGET + fmov d19, xzr + ret +.size abi_test_clobber_d19,.-abi_test_clobber_d19 +.type abi_test_clobber_d20, %function +.globl abi_test_clobber_d20 +.hidden abi_test_clobber_d20 +.align 4 +abi_test_clobber_d20: + AARCH64_VALID_CALL_TARGET + fmov d20, xzr + ret +.size abi_test_clobber_d20,.-abi_test_clobber_d20 +.type abi_test_clobber_d21, %function +.globl abi_test_clobber_d21 +.hidden abi_test_clobber_d21 +.align 4 +abi_test_clobber_d21: + AARCH64_VALID_CALL_TARGET + fmov d21, xzr + ret +.size abi_test_clobber_d21,.-abi_test_clobber_d21 +.type abi_test_clobber_d22, %function +.globl abi_test_clobber_d22 +.hidden abi_test_clobber_d22 +.align 4 +abi_test_clobber_d22: + AARCH64_VALID_CALL_TARGET + fmov d22, xzr + ret +.size abi_test_clobber_d22,.-abi_test_clobber_d22 +.type abi_test_clobber_d23, %function +.globl abi_test_clobber_d23 +.hidden abi_test_clobber_d23 +.align 4 +abi_test_clobber_d23: + AARCH64_VALID_CALL_TARGET + fmov d23, xzr + ret +.size abi_test_clobber_d23,.-abi_test_clobber_d23 +.type abi_test_clobber_d24, %function +.globl abi_test_clobber_d24 +.hidden abi_test_clobber_d24 +.align 4 +abi_test_clobber_d24: + AARCH64_VALID_CALL_TARGET + fmov d24, xzr + ret +.size abi_test_clobber_d24,.-abi_test_clobber_d24 +.type abi_test_clobber_d25, %function +.globl abi_test_clobber_d25 +.hidden abi_test_clobber_d25 +.align 4 +abi_test_clobber_d25: + AARCH64_VALID_CALL_TARGET + fmov d25, xzr + ret +.size abi_test_clobber_d25,.-abi_test_clobber_d25 +.type abi_test_clobber_d26, %function +.globl abi_test_clobber_d26 +.hidden abi_test_clobber_d26 +.align 4 +abi_test_clobber_d26: + AARCH64_VALID_CALL_TARGET + fmov d26, xzr + ret +.size abi_test_clobber_d26,.-abi_test_clobber_d26 +.type abi_test_clobber_d27, %function +.globl abi_test_clobber_d27 +.hidden abi_test_clobber_d27 +.align 4 +abi_test_clobber_d27: + AARCH64_VALID_CALL_TARGET + fmov d27, xzr + ret +.size abi_test_clobber_d27,.-abi_test_clobber_d27 +.type abi_test_clobber_d28, %function +.globl abi_test_clobber_d28 +.hidden abi_test_clobber_d28 +.align 4 +abi_test_clobber_d28: + AARCH64_VALID_CALL_TARGET + fmov d28, xzr + ret +.size abi_test_clobber_d28,.-abi_test_clobber_d28 +.type abi_test_clobber_d29, %function +.globl abi_test_clobber_d29 +.hidden abi_test_clobber_d29 +.align 4 +abi_test_clobber_d29: + AARCH64_VALID_CALL_TARGET + fmov d29, xzr + ret +.size abi_test_clobber_d29,.-abi_test_clobber_d29 +.type abi_test_clobber_d30, %function +.globl abi_test_clobber_d30 +.hidden abi_test_clobber_d30 +.align 4 +abi_test_clobber_d30: + AARCH64_VALID_CALL_TARGET + fmov d30, xzr + ret +.size abi_test_clobber_d30,.-abi_test_clobber_d30 +.type abi_test_clobber_d31, %function +.globl abi_test_clobber_d31 +.hidden abi_test_clobber_d31 +.align 4 +abi_test_clobber_d31: + AARCH64_VALID_CALL_TARGET + fmov d31, xzr + ret +.size abi_test_clobber_d31,.-abi_test_clobber_d31 +.type abi_test_clobber_v8_upper, %function +.globl abi_test_clobber_v8_upper +.hidden abi_test_clobber_v8_upper +.align 4 +abi_test_clobber_v8_upper: + AARCH64_VALID_CALL_TARGET + fmov v8.d[1], xzr + ret +.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper +.type abi_test_clobber_v9_upper, %function +.globl abi_test_clobber_v9_upper +.hidden abi_test_clobber_v9_upper +.align 4 +abi_test_clobber_v9_upper: + AARCH64_VALID_CALL_TARGET + fmov v9.d[1], xzr + ret +.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper +.type abi_test_clobber_v10_upper, %function +.globl abi_test_clobber_v10_upper +.hidden abi_test_clobber_v10_upper +.align 4 +abi_test_clobber_v10_upper: + AARCH64_VALID_CALL_TARGET + fmov v10.d[1], xzr + ret +.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper +.type abi_test_clobber_v11_upper, %function +.globl abi_test_clobber_v11_upper +.hidden abi_test_clobber_v11_upper +.align 4 +abi_test_clobber_v11_upper: + AARCH64_VALID_CALL_TARGET + fmov v11.d[1], xzr + ret +.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper +.type abi_test_clobber_v12_upper, %function +.globl abi_test_clobber_v12_upper +.hidden abi_test_clobber_v12_upper +.align 4 +abi_test_clobber_v12_upper: + AARCH64_VALID_CALL_TARGET + fmov v12.d[1], xzr + ret +.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper +.type abi_test_clobber_v13_upper, %function +.globl abi_test_clobber_v13_upper +.hidden abi_test_clobber_v13_upper +.align 4 +abi_test_clobber_v13_upper: + AARCH64_VALID_CALL_TARGET + fmov v13.d[1], xzr + ret +.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper +.type abi_test_clobber_v14_upper, %function +.globl abi_test_clobber_v14_upper +.hidden abi_test_clobber_v14_upper +.align 4 +abi_test_clobber_v14_upper: + AARCH64_VALID_CALL_TARGET + fmov v14.d[1], xzr + ret +.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper +.type abi_test_clobber_v15_upper, %function +.globl abi_test_clobber_v15_upper +.hidden abi_test_clobber_v15_upper +.align 4 +abi_test_clobber_v15_upper: + AARCH64_VALID_CALL_TARGET + fmov v15.d[1], xzr + ret +.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-win.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-win.S new file mode 100644 index 0000000000..14773e3a0f --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-armv8-win.S @@ -0,0 +1,750 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) +#include + +.text + +// abi_test_trampoline loads callee-saved registers from |state|, calls |func| +// with |argv|, then saves the callee-saved registers into |state|. It returns +// the result of |func|. The |unwind| argument is unused. +// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, +// const uint64_t *argv, size_t argc, +// uint64_t unwind); + +.globl abi_test_trampoline + +.align 4 +abi_test_trampoline: +Labi_test_trampoline_begin: + AARCH64_SIGN_LINK_REGISTER + // Stack layout (low to high addresses) + // x29,x30 (16 bytes) + // d8-d15 (64 bytes) + // x19-x28 (80 bytes) + // x1 (8 bytes) + // padding (8 bytes) + stp x29, x30, [sp, #-176]! + mov x29, sp + + // Saved callee-saved registers and |state|. + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] + stp x19, x20, [sp, #80] + stp x21, x22, [sp, #96] + stp x23, x24, [sp, #112] + stp x25, x26, [sp, #128] + stp x27, x28, [sp, #144] + str x1, [sp, #160] + + // Load registers from |state|, with the exception of x29. x29 is the + // frame pointer and also callee-saved, but AAPCS64 allows platforms to + // mandate that x29 always point to a frame. iOS64 does so, which means + // we cannot fill x29 with entropy without violating ABI rules + // ourselves. x29 is tested separately below. + ldp d8, d9, [x1], #16 + ldp d10, d11, [x1], #16 + ldp d12, d13, [x1], #16 + ldp d14, d15, [x1], #16 + ldp x19, x20, [x1], #16 + ldp x21, x22, [x1], #16 + ldp x23, x24, [x1], #16 + ldp x25, x26, [x1], #16 + ldp x27, x28, [x1], #16 + + // Move parameters into temporary registers. + mov x9, x0 + mov x10, x2 + mov x11, x3 + + // Load parameters into registers. + cbz x11, Largs_done + ldr x0, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x1, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x2, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x3, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x4, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x5, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x6, [x10], #8 + subs x11, x11, #1 + b.eq Largs_done + ldr x7, [x10], #8 + +Largs_done: + blr x9 + + // Reload |state| and store registers. + ldr x1, [sp, #160] + stp d8, d9, [x1], #16 + stp d10, d11, [x1], #16 + stp d12, d13, [x1], #16 + stp d14, d15, [x1], #16 + stp x19, x20, [x1], #16 + stp x21, x22, [x1], #16 + stp x23, x24, [x1], #16 + stp x25, x26, [x1], #16 + stp x27, x28, [x1], #16 + + // |func| is required to preserve x29, the frame pointer. We cannot load + // random values into x29 (see comment above), so compare it against the + // expected value and zero the field of |state| if corrupted. + mov x9, sp + cmp x29, x9 + b.eq Lx29_ok + str xzr, [x1] + +Lx29_ok: + // Restore callee-saved registers. + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] + ldp x19, x20, [sp, #80] + ldp x21, x22, [sp, #96] + ldp x23, x24, [sp, #112] + ldp x25, x26, [sp, #128] + ldp x27, x28, [sp, #144] + + ldp x29, x30, [sp], #176 + AARCH64_VALIDATE_LINK_REGISTER + ret + + +.globl abi_test_clobber_x0 + +.align 4 +abi_test_clobber_x0: + AARCH64_VALID_CALL_TARGET + mov x0, xzr + ret + + +.globl abi_test_clobber_x1 + +.align 4 +abi_test_clobber_x1: + AARCH64_VALID_CALL_TARGET + mov x1, xzr + ret + + +.globl abi_test_clobber_x2 + +.align 4 +abi_test_clobber_x2: + AARCH64_VALID_CALL_TARGET + mov x2, xzr + ret + + +.globl abi_test_clobber_x3 + +.align 4 +abi_test_clobber_x3: + AARCH64_VALID_CALL_TARGET + mov x3, xzr + ret + + +.globl abi_test_clobber_x4 + +.align 4 +abi_test_clobber_x4: + AARCH64_VALID_CALL_TARGET + mov x4, xzr + ret + + +.globl abi_test_clobber_x5 + +.align 4 +abi_test_clobber_x5: + AARCH64_VALID_CALL_TARGET + mov x5, xzr + ret + + +.globl abi_test_clobber_x6 + +.align 4 +abi_test_clobber_x6: + AARCH64_VALID_CALL_TARGET + mov x6, xzr + ret + + +.globl abi_test_clobber_x7 + +.align 4 +abi_test_clobber_x7: + AARCH64_VALID_CALL_TARGET + mov x7, xzr + ret + + +.globl abi_test_clobber_x8 + +.align 4 +abi_test_clobber_x8: + AARCH64_VALID_CALL_TARGET + mov x8, xzr + ret + + +.globl abi_test_clobber_x9 + +.align 4 +abi_test_clobber_x9: + AARCH64_VALID_CALL_TARGET + mov x9, xzr + ret + + +.globl abi_test_clobber_x10 + +.align 4 +abi_test_clobber_x10: + AARCH64_VALID_CALL_TARGET + mov x10, xzr + ret + + +.globl abi_test_clobber_x11 + +.align 4 +abi_test_clobber_x11: + AARCH64_VALID_CALL_TARGET + mov x11, xzr + ret + + +.globl abi_test_clobber_x12 + +.align 4 +abi_test_clobber_x12: + AARCH64_VALID_CALL_TARGET + mov x12, xzr + ret + + +.globl abi_test_clobber_x13 + +.align 4 +abi_test_clobber_x13: + AARCH64_VALID_CALL_TARGET + mov x13, xzr + ret + + +.globl abi_test_clobber_x14 + +.align 4 +abi_test_clobber_x14: + AARCH64_VALID_CALL_TARGET + mov x14, xzr + ret + + +.globl abi_test_clobber_x15 + +.align 4 +abi_test_clobber_x15: + AARCH64_VALID_CALL_TARGET + mov x15, xzr + ret + + +.globl abi_test_clobber_x16 + +.align 4 +abi_test_clobber_x16: + AARCH64_VALID_CALL_TARGET + mov x16, xzr + ret + + +.globl abi_test_clobber_x17 + +.align 4 +abi_test_clobber_x17: + AARCH64_VALID_CALL_TARGET + mov x17, xzr + ret + + +.globl abi_test_clobber_x19 + +.align 4 +abi_test_clobber_x19: + AARCH64_VALID_CALL_TARGET + mov x19, xzr + ret + + +.globl abi_test_clobber_x20 + +.align 4 +abi_test_clobber_x20: + AARCH64_VALID_CALL_TARGET + mov x20, xzr + ret + + +.globl abi_test_clobber_x21 + +.align 4 +abi_test_clobber_x21: + AARCH64_VALID_CALL_TARGET + mov x21, xzr + ret + + +.globl abi_test_clobber_x22 + +.align 4 +abi_test_clobber_x22: + AARCH64_VALID_CALL_TARGET + mov x22, xzr + ret + + +.globl abi_test_clobber_x23 + +.align 4 +abi_test_clobber_x23: + AARCH64_VALID_CALL_TARGET + mov x23, xzr + ret + + +.globl abi_test_clobber_x24 + +.align 4 +abi_test_clobber_x24: + AARCH64_VALID_CALL_TARGET + mov x24, xzr + ret + + +.globl abi_test_clobber_x25 + +.align 4 +abi_test_clobber_x25: + AARCH64_VALID_CALL_TARGET + mov x25, xzr + ret + + +.globl abi_test_clobber_x26 + +.align 4 +abi_test_clobber_x26: + AARCH64_VALID_CALL_TARGET + mov x26, xzr + ret + + +.globl abi_test_clobber_x27 + +.align 4 +abi_test_clobber_x27: + AARCH64_VALID_CALL_TARGET + mov x27, xzr + ret + + +.globl abi_test_clobber_x28 + +.align 4 +abi_test_clobber_x28: + AARCH64_VALID_CALL_TARGET + mov x28, xzr + ret + + +.globl abi_test_clobber_x29 + +.align 4 +abi_test_clobber_x29: + AARCH64_VALID_CALL_TARGET + mov x29, xzr + ret + + +.globl abi_test_clobber_d0 + +.align 4 +abi_test_clobber_d0: + AARCH64_VALID_CALL_TARGET + fmov d0, xzr + ret + + +.globl abi_test_clobber_d1 + +.align 4 +abi_test_clobber_d1: + AARCH64_VALID_CALL_TARGET + fmov d1, xzr + ret + + +.globl abi_test_clobber_d2 + +.align 4 +abi_test_clobber_d2: + AARCH64_VALID_CALL_TARGET + fmov d2, xzr + ret + + +.globl abi_test_clobber_d3 + +.align 4 +abi_test_clobber_d3: + AARCH64_VALID_CALL_TARGET + fmov d3, xzr + ret + + +.globl abi_test_clobber_d4 + +.align 4 +abi_test_clobber_d4: + AARCH64_VALID_CALL_TARGET + fmov d4, xzr + ret + + +.globl abi_test_clobber_d5 + +.align 4 +abi_test_clobber_d5: + AARCH64_VALID_CALL_TARGET + fmov d5, xzr + ret + + +.globl abi_test_clobber_d6 + +.align 4 +abi_test_clobber_d6: + AARCH64_VALID_CALL_TARGET + fmov d6, xzr + ret + + +.globl abi_test_clobber_d7 + +.align 4 +abi_test_clobber_d7: + AARCH64_VALID_CALL_TARGET + fmov d7, xzr + ret + + +.globl abi_test_clobber_d8 + +.align 4 +abi_test_clobber_d8: + AARCH64_VALID_CALL_TARGET + fmov d8, xzr + ret + + +.globl abi_test_clobber_d9 + +.align 4 +abi_test_clobber_d9: + AARCH64_VALID_CALL_TARGET + fmov d9, xzr + ret + + +.globl abi_test_clobber_d10 + +.align 4 +abi_test_clobber_d10: + AARCH64_VALID_CALL_TARGET + fmov d10, xzr + ret + + +.globl abi_test_clobber_d11 + +.align 4 +abi_test_clobber_d11: + AARCH64_VALID_CALL_TARGET + fmov d11, xzr + ret + + +.globl abi_test_clobber_d12 + +.align 4 +abi_test_clobber_d12: + AARCH64_VALID_CALL_TARGET + fmov d12, xzr + ret + + +.globl abi_test_clobber_d13 + +.align 4 +abi_test_clobber_d13: + AARCH64_VALID_CALL_TARGET + fmov d13, xzr + ret + + +.globl abi_test_clobber_d14 + +.align 4 +abi_test_clobber_d14: + AARCH64_VALID_CALL_TARGET + fmov d14, xzr + ret + + +.globl abi_test_clobber_d15 + +.align 4 +abi_test_clobber_d15: + AARCH64_VALID_CALL_TARGET + fmov d15, xzr + ret + + +.globl abi_test_clobber_d16 + +.align 4 +abi_test_clobber_d16: + AARCH64_VALID_CALL_TARGET + fmov d16, xzr + ret + + +.globl abi_test_clobber_d17 + +.align 4 +abi_test_clobber_d17: + AARCH64_VALID_CALL_TARGET + fmov d17, xzr + ret + + +.globl abi_test_clobber_d18 + +.align 4 +abi_test_clobber_d18: + AARCH64_VALID_CALL_TARGET + fmov d18, xzr + ret + + +.globl abi_test_clobber_d19 + +.align 4 +abi_test_clobber_d19: + AARCH64_VALID_CALL_TARGET + fmov d19, xzr + ret + + +.globl abi_test_clobber_d20 + +.align 4 +abi_test_clobber_d20: + AARCH64_VALID_CALL_TARGET + fmov d20, xzr + ret + + +.globl abi_test_clobber_d21 + +.align 4 +abi_test_clobber_d21: + AARCH64_VALID_CALL_TARGET + fmov d21, xzr + ret + + +.globl abi_test_clobber_d22 + +.align 4 +abi_test_clobber_d22: + AARCH64_VALID_CALL_TARGET + fmov d22, xzr + ret + + +.globl abi_test_clobber_d23 + +.align 4 +abi_test_clobber_d23: + AARCH64_VALID_CALL_TARGET + fmov d23, xzr + ret + + +.globl abi_test_clobber_d24 + +.align 4 +abi_test_clobber_d24: + AARCH64_VALID_CALL_TARGET + fmov d24, xzr + ret + + +.globl abi_test_clobber_d25 + +.align 4 +abi_test_clobber_d25: + AARCH64_VALID_CALL_TARGET + fmov d25, xzr + ret + + +.globl abi_test_clobber_d26 + +.align 4 +abi_test_clobber_d26: + AARCH64_VALID_CALL_TARGET + fmov d26, xzr + ret + + +.globl abi_test_clobber_d27 + +.align 4 +abi_test_clobber_d27: + AARCH64_VALID_CALL_TARGET + fmov d27, xzr + ret + + +.globl abi_test_clobber_d28 + +.align 4 +abi_test_clobber_d28: + AARCH64_VALID_CALL_TARGET + fmov d28, xzr + ret + + +.globl abi_test_clobber_d29 + +.align 4 +abi_test_clobber_d29: + AARCH64_VALID_CALL_TARGET + fmov d29, xzr + ret + + +.globl abi_test_clobber_d30 + +.align 4 +abi_test_clobber_d30: + AARCH64_VALID_CALL_TARGET + fmov d30, xzr + ret + + +.globl abi_test_clobber_d31 + +.align 4 +abi_test_clobber_d31: + AARCH64_VALID_CALL_TARGET + fmov d31, xzr + ret + + +.globl abi_test_clobber_v8_upper + +.align 4 +abi_test_clobber_v8_upper: + AARCH64_VALID_CALL_TARGET + fmov v8.d[1], xzr + ret + + +.globl abi_test_clobber_v9_upper + +.align 4 +abi_test_clobber_v9_upper: + AARCH64_VALID_CALL_TARGET + fmov v9.d[1], xzr + ret + + +.globl abi_test_clobber_v10_upper + +.align 4 +abi_test_clobber_v10_upper: + AARCH64_VALID_CALL_TARGET + fmov v10.d[1], xzr + ret + + +.globl abi_test_clobber_v11_upper + +.align 4 +abi_test_clobber_v11_upper: + AARCH64_VALID_CALL_TARGET + fmov v11.d[1], xzr + ret + + +.globl abi_test_clobber_v12_upper + +.align 4 +abi_test_clobber_v12_upper: + AARCH64_VALID_CALL_TARGET + fmov v12.d[1], xzr + ret + + +.globl abi_test_clobber_v13_upper + +.align 4 +abi_test_clobber_v13_upper: + AARCH64_VALID_CALL_TARGET + fmov v13.d[1], xzr + ret + + +.globl abi_test_clobber_v14_upper + +.align 4 +abi_test_clobber_v14_upper: + AARCH64_VALID_CALL_TARGET + fmov v14.d[1], xzr + ret + + +.globl abi_test_clobber_v15_upper + +.align 4 +abi_test_clobber_v15_upper: + AARCH64_VALID_CALL_TARGET + fmov v15.d[1], xzr + ret + +#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-apple.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-apple.S new file mode 100644 index 0000000000..4065b9ad80 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-apple.S @@ -0,0 +1,168 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) +.text +.globl _abi_test_trampoline +.private_extern _abi_test_trampoline +.align 4 +_abi_test_trampoline: +L_abi_test_trampoline_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%ecx + movl (%ecx),%esi + movl 4(%ecx),%edi + movl 8(%ecx),%ebx + movl 12(%ecx),%ebp + subl $44,%esp + movl 72(%esp),%eax + xorl %ecx,%ecx +L000loop: + cmpl 76(%esp),%ecx + jae L001loop_done + movl (%eax,%ecx,4),%edx + movl %edx,(%esp,%ecx,4) + addl $1,%ecx + jmp L000loop +L001loop_done: + call *64(%esp) + addl $44,%esp + movl 24(%esp),%ecx + movl %esi,(%ecx) + movl %edi,4(%ecx) + movl %ebx,8(%ecx) + movl %ebp,12(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _abi_test_get_and_clear_direction_flag +.private_extern _abi_test_get_and_clear_direction_flag +.align 4 +_abi_test_get_and_clear_direction_flag: +L_abi_test_get_and_clear_direction_flag_begin: + pushfl + popl %eax + andl $1024,%eax + shrl $10,%eax + cld + ret +.globl _abi_test_set_direction_flag +.private_extern _abi_test_set_direction_flag +.align 4 +_abi_test_set_direction_flag: +L_abi_test_set_direction_flag_begin: + std + ret +.globl _abi_test_clobber_eax +.private_extern _abi_test_clobber_eax +.align 4 +_abi_test_clobber_eax: +L_abi_test_clobber_eax_begin: + xorl %eax,%eax + ret +.globl _abi_test_clobber_ebx +.private_extern _abi_test_clobber_ebx +.align 4 +_abi_test_clobber_ebx: +L_abi_test_clobber_ebx_begin: + xorl %ebx,%ebx + ret +.globl _abi_test_clobber_ecx +.private_extern _abi_test_clobber_ecx +.align 4 +_abi_test_clobber_ecx: +L_abi_test_clobber_ecx_begin: + xorl %ecx,%ecx + ret +.globl _abi_test_clobber_edx +.private_extern _abi_test_clobber_edx +.align 4 +_abi_test_clobber_edx: +L_abi_test_clobber_edx_begin: + xorl %edx,%edx + ret +.globl _abi_test_clobber_edi +.private_extern _abi_test_clobber_edi +.align 4 +_abi_test_clobber_edi: +L_abi_test_clobber_edi_begin: + xorl %edi,%edi + ret +.globl _abi_test_clobber_esi +.private_extern _abi_test_clobber_esi +.align 4 +_abi_test_clobber_esi: +L_abi_test_clobber_esi_begin: + xorl %esi,%esi + ret +.globl _abi_test_clobber_ebp +.private_extern _abi_test_clobber_ebp +.align 4 +_abi_test_clobber_ebp: +L_abi_test_clobber_ebp_begin: + xorl %ebp,%ebp + ret +.globl _abi_test_clobber_xmm0 +.private_extern _abi_test_clobber_xmm0 +.align 4 +_abi_test_clobber_xmm0: +L_abi_test_clobber_xmm0_begin: + pxor %xmm0,%xmm0 + ret +.globl _abi_test_clobber_xmm1 +.private_extern _abi_test_clobber_xmm1 +.align 4 +_abi_test_clobber_xmm1: +L_abi_test_clobber_xmm1_begin: + pxor %xmm1,%xmm1 + ret +.globl _abi_test_clobber_xmm2 +.private_extern _abi_test_clobber_xmm2 +.align 4 +_abi_test_clobber_xmm2: +L_abi_test_clobber_xmm2_begin: + pxor %xmm2,%xmm2 + ret +.globl _abi_test_clobber_xmm3 +.private_extern _abi_test_clobber_xmm3 +.align 4 +_abi_test_clobber_xmm3: +L_abi_test_clobber_xmm3_begin: + pxor %xmm3,%xmm3 + ret +.globl _abi_test_clobber_xmm4 +.private_extern _abi_test_clobber_xmm4 +.align 4 +_abi_test_clobber_xmm4: +L_abi_test_clobber_xmm4_begin: + pxor %xmm4,%xmm4 + ret +.globl _abi_test_clobber_xmm5 +.private_extern _abi_test_clobber_xmm5 +.align 4 +_abi_test_clobber_xmm5: +L_abi_test_clobber_xmm5_begin: + pxor %xmm5,%xmm5 + ret +.globl _abi_test_clobber_xmm6 +.private_extern _abi_test_clobber_xmm6 +.align 4 +_abi_test_clobber_xmm6: +L_abi_test_clobber_xmm6_begin: + pxor %xmm6,%xmm6 + ret +.globl _abi_test_clobber_xmm7 +.private_extern _abi_test_clobber_xmm7 +.align 4 +_abi_test_clobber_xmm7: +L_abi_test_clobber_xmm7_begin: + pxor %xmm7,%xmm7 + ret +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-linux.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-linux.S new file mode 100644 index 0000000000..3452c634ca --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-linux.S @@ -0,0 +1,204 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) +.text +.globl abi_test_trampoline +.hidden abi_test_trampoline +.type abi_test_trampoline,@function +.align 16 +abi_test_trampoline: +.L_abi_test_trampoline_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%ecx + movl (%ecx),%esi + movl 4(%ecx),%edi + movl 8(%ecx),%ebx + movl 12(%ecx),%ebp + subl $44,%esp + movl 72(%esp),%eax + xorl %ecx,%ecx +.L000loop: + cmpl 76(%esp),%ecx + jae .L001loop_done + movl (%eax,%ecx,4),%edx + movl %edx,(%esp,%ecx,4) + addl $1,%ecx + jmp .L000loop +.L001loop_done: + call *64(%esp) + addl $44,%esp + movl 24(%esp),%ecx + movl %esi,(%ecx) + movl %edi,4(%ecx) + movl %ebx,8(%ecx) + movl %ebp,12(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size abi_test_trampoline,.-.L_abi_test_trampoline_begin +.globl abi_test_get_and_clear_direction_flag +.hidden abi_test_get_and_clear_direction_flag +.type abi_test_get_and_clear_direction_flag,@function +.align 16 +abi_test_get_and_clear_direction_flag: +.L_abi_test_get_and_clear_direction_flag_begin: + pushfl + popl %eax + andl $1024,%eax + shrl $10,%eax + cld + ret +.size abi_test_get_and_clear_direction_flag,.-.L_abi_test_get_and_clear_direction_flag_begin +.globl abi_test_set_direction_flag +.hidden abi_test_set_direction_flag +.type abi_test_set_direction_flag,@function +.align 16 +abi_test_set_direction_flag: +.L_abi_test_set_direction_flag_begin: + std + ret +.size abi_test_set_direction_flag,.-.L_abi_test_set_direction_flag_begin +.globl abi_test_clobber_eax +.hidden abi_test_clobber_eax +.type abi_test_clobber_eax,@function +.align 16 +abi_test_clobber_eax: +.L_abi_test_clobber_eax_begin: + xorl %eax,%eax + ret +.size abi_test_clobber_eax,.-.L_abi_test_clobber_eax_begin +.globl abi_test_clobber_ebx +.hidden abi_test_clobber_ebx +.type abi_test_clobber_ebx,@function +.align 16 +abi_test_clobber_ebx: +.L_abi_test_clobber_ebx_begin: + xorl %ebx,%ebx + ret +.size abi_test_clobber_ebx,.-.L_abi_test_clobber_ebx_begin +.globl abi_test_clobber_ecx +.hidden abi_test_clobber_ecx +.type abi_test_clobber_ecx,@function +.align 16 +abi_test_clobber_ecx: +.L_abi_test_clobber_ecx_begin: + xorl %ecx,%ecx + ret +.size abi_test_clobber_ecx,.-.L_abi_test_clobber_ecx_begin +.globl abi_test_clobber_edx +.hidden abi_test_clobber_edx +.type abi_test_clobber_edx,@function +.align 16 +abi_test_clobber_edx: +.L_abi_test_clobber_edx_begin: + xorl %edx,%edx + ret +.size abi_test_clobber_edx,.-.L_abi_test_clobber_edx_begin +.globl abi_test_clobber_edi +.hidden abi_test_clobber_edi +.type abi_test_clobber_edi,@function +.align 16 +abi_test_clobber_edi: +.L_abi_test_clobber_edi_begin: + xorl %edi,%edi + ret +.size abi_test_clobber_edi,.-.L_abi_test_clobber_edi_begin +.globl abi_test_clobber_esi +.hidden abi_test_clobber_esi +.type abi_test_clobber_esi,@function +.align 16 +abi_test_clobber_esi: +.L_abi_test_clobber_esi_begin: + xorl %esi,%esi + ret +.size abi_test_clobber_esi,.-.L_abi_test_clobber_esi_begin +.globl abi_test_clobber_ebp +.hidden abi_test_clobber_ebp +.type abi_test_clobber_ebp,@function +.align 16 +abi_test_clobber_ebp: +.L_abi_test_clobber_ebp_begin: + xorl %ebp,%ebp + ret +.size abi_test_clobber_ebp,.-.L_abi_test_clobber_ebp_begin +.globl abi_test_clobber_xmm0 +.hidden abi_test_clobber_xmm0 +.type abi_test_clobber_xmm0,@function +.align 16 +abi_test_clobber_xmm0: +.L_abi_test_clobber_xmm0_begin: + pxor %xmm0,%xmm0 + ret +.size abi_test_clobber_xmm0,.-.L_abi_test_clobber_xmm0_begin +.globl abi_test_clobber_xmm1 +.hidden abi_test_clobber_xmm1 +.type abi_test_clobber_xmm1,@function +.align 16 +abi_test_clobber_xmm1: +.L_abi_test_clobber_xmm1_begin: + pxor %xmm1,%xmm1 + ret +.size abi_test_clobber_xmm1,.-.L_abi_test_clobber_xmm1_begin +.globl abi_test_clobber_xmm2 +.hidden abi_test_clobber_xmm2 +.type abi_test_clobber_xmm2,@function +.align 16 +abi_test_clobber_xmm2: +.L_abi_test_clobber_xmm2_begin: + pxor %xmm2,%xmm2 + ret +.size abi_test_clobber_xmm2,.-.L_abi_test_clobber_xmm2_begin +.globl abi_test_clobber_xmm3 +.hidden abi_test_clobber_xmm3 +.type abi_test_clobber_xmm3,@function +.align 16 +abi_test_clobber_xmm3: +.L_abi_test_clobber_xmm3_begin: + pxor %xmm3,%xmm3 + ret +.size abi_test_clobber_xmm3,.-.L_abi_test_clobber_xmm3_begin +.globl abi_test_clobber_xmm4 +.hidden abi_test_clobber_xmm4 +.type abi_test_clobber_xmm4,@function +.align 16 +abi_test_clobber_xmm4: +.L_abi_test_clobber_xmm4_begin: + pxor %xmm4,%xmm4 + ret +.size abi_test_clobber_xmm4,.-.L_abi_test_clobber_xmm4_begin +.globl abi_test_clobber_xmm5 +.hidden abi_test_clobber_xmm5 +.type abi_test_clobber_xmm5,@function +.align 16 +abi_test_clobber_xmm5: +.L_abi_test_clobber_xmm5_begin: + pxor %xmm5,%xmm5 + ret +.size abi_test_clobber_xmm5,.-.L_abi_test_clobber_xmm5_begin +.globl abi_test_clobber_xmm6 +.hidden abi_test_clobber_xmm6 +.type abi_test_clobber_xmm6,@function +.align 16 +abi_test_clobber_xmm6: +.L_abi_test_clobber_xmm6_begin: + pxor %xmm6,%xmm6 + ret +.size abi_test_clobber_xmm6,.-.L_abi_test_clobber_xmm6_begin +.globl abi_test_clobber_xmm7 +.hidden abi_test_clobber_xmm7 +.type abi_test_clobber_xmm7,@function +.align 16 +abi_test_clobber_xmm7: +.L_abi_test_clobber_xmm7_begin: + pxor %xmm7,%xmm7 + ret +.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin +#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-win.asm b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-win.asm new file mode 100644 index 0000000000..3ef9917afe --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86-win.asm @@ -0,0 +1,161 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +%ifidn __OUTPUT_FORMAT__, win32 +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _abi_test_trampoline +align 16 +_abi_test_trampoline: +L$_abi_test_trampoline_begin: + push ebp + push ebx + push esi + push edi + mov ecx,DWORD [24+esp] + mov esi,DWORD [ecx] + mov edi,DWORD [4+ecx] + mov ebx,DWORD [8+ecx] + mov ebp,DWORD [12+ecx] + sub esp,44 + mov eax,DWORD [72+esp] + xor ecx,ecx +L$000loop: + cmp ecx,DWORD [76+esp] + jae NEAR L$001loop_done + mov edx,DWORD [ecx*4+eax] + mov DWORD [ecx*4+esp],edx + add ecx,1 + jmp NEAR L$000loop +L$001loop_done: + call DWORD [64+esp] + add esp,44 + mov ecx,DWORD [24+esp] + mov DWORD [ecx],esi + mov DWORD [4+ecx],edi + mov DWORD [8+ecx],ebx + mov DWORD [12+ecx],ebp + pop edi + pop esi + pop ebx + pop ebp + ret +global _abi_test_get_and_clear_direction_flag +align 16 +_abi_test_get_and_clear_direction_flag: +L$_abi_test_get_and_clear_direction_flag_begin: + pushfd + pop eax + and eax,1024 + shr eax,10 + cld + ret +global _abi_test_set_direction_flag +align 16 +_abi_test_set_direction_flag: +L$_abi_test_set_direction_flag_begin: + std + ret +global _abi_test_clobber_eax +align 16 +_abi_test_clobber_eax: +L$_abi_test_clobber_eax_begin: + xor eax,eax + ret +global _abi_test_clobber_ebx +align 16 +_abi_test_clobber_ebx: +L$_abi_test_clobber_ebx_begin: + xor ebx,ebx + ret +global _abi_test_clobber_ecx +align 16 +_abi_test_clobber_ecx: +L$_abi_test_clobber_ecx_begin: + xor ecx,ecx + ret +global _abi_test_clobber_edx +align 16 +_abi_test_clobber_edx: +L$_abi_test_clobber_edx_begin: + xor edx,edx + ret +global _abi_test_clobber_edi +align 16 +_abi_test_clobber_edi: +L$_abi_test_clobber_edi_begin: + xor edi,edi + ret +global _abi_test_clobber_esi +align 16 +_abi_test_clobber_esi: +L$_abi_test_clobber_esi_begin: + xor esi,esi + ret +global _abi_test_clobber_ebp +align 16 +_abi_test_clobber_ebp: +L$_abi_test_clobber_ebp_begin: + xor ebp,ebp + ret +global _abi_test_clobber_xmm0 +align 16 +_abi_test_clobber_xmm0: +L$_abi_test_clobber_xmm0_begin: + pxor xmm0,xmm0 + ret +global _abi_test_clobber_xmm1 +align 16 +_abi_test_clobber_xmm1: +L$_abi_test_clobber_xmm1_begin: + pxor xmm1,xmm1 + ret +global _abi_test_clobber_xmm2 +align 16 +_abi_test_clobber_xmm2: +L$_abi_test_clobber_xmm2_begin: + pxor xmm2,xmm2 + ret +global _abi_test_clobber_xmm3 +align 16 +_abi_test_clobber_xmm3: +L$_abi_test_clobber_xmm3_begin: + pxor xmm3,xmm3 + ret +global _abi_test_clobber_xmm4 +align 16 +_abi_test_clobber_xmm4: +L$_abi_test_clobber_xmm4_begin: + pxor xmm4,xmm4 + ret +global _abi_test_clobber_xmm5 +align 16 +_abi_test_clobber_xmm5: +L$_abi_test_clobber_xmm5_begin: + pxor xmm5,xmm5 + ret +global _abi_test_clobber_xmm6 +align 16 +_abi_test_clobber_xmm6: +L$_abi_test_clobber_xmm6_begin: + pxor xmm6,xmm6 + ret +global _abi_test_clobber_xmm7 +align 16 +_abi_test_clobber_xmm7: +L$_abi_test_clobber_xmm7_begin: + pxor xmm7,xmm7 + ret +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-apple.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-apple.S new file mode 100644 index 0000000000..7c76d2d744 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-apple.S @@ -0,0 +1,541 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) +.text + + + + + + + + + +.globl _abi_test_trampoline +.private_extern _abi_test_trampoline +.p2align 4 +_abi_test_trampoline: + + +_CET_ENDBR + + + + + + + + + + subq $120,%rsp + + + movq %r8,48(%rsp) + movq %rbx,64(%rsp) + + + movq %rbp,72(%rsp) + + + movq %r12,80(%rsp) + + + movq %r13,88(%rsp) + + + movq %r14,96(%rsp) + + + movq %r15,104(%rsp) + + + movq 0(%rsi),%rbx + movq 8(%rsi),%rbp + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq 32(%rsi),%r14 + movq 40(%rsi),%r15 + + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + + + + + movq %rdx,%r10 + movq %rcx,%r11 + decq %r11 + js L$args_done + movq (%r10),%rdi + addq $8,%r10 + decq %r11 + js L$args_done + movq (%r10),%rsi + addq $8,%r10 + decq %r11 + js L$args_done + movq (%r10),%rdx + addq $8,%r10 + decq %r11 + js L$args_done + movq (%r10),%rcx + addq $8,%r10 + decq %r11 + js L$args_done + movq (%r10),%r8 + addq $8,%r10 + decq %r11 + js L$args_done + movq (%r10),%r9 + addq $8,%r10 + leaq 0(%rsp),%rax +L$args_loop: + decq %r11 + js L$args_done + + + + + + + movq %r11,56(%rsp) + movq (%r10),%r11 + movq %r11,(%rax) + movq 56(%rsp),%r11 + + addq $8,%r10 + addq $8,%rax + jmp L$args_loop + +L$args_done: + movq 32(%rsp),%rax + movq 48(%rsp),%r10 + testq %r10,%r10 + jz L$no_unwind + + + pushfq + orq $0x100,0(%rsp) + popfq + + + + nop +.globl _abi_test_unwind_start +.private_extern _abi_test_unwind_start +_abi_test_unwind_start: + + call *%rax +.globl _abi_test_unwind_return +.private_extern _abi_test_unwind_return +_abi_test_unwind_return: + + + + + pushfq + andq $-0x101,0(%rsp) + popfq +.globl _abi_test_unwind_stop +.private_extern _abi_test_unwind_stop +_abi_test_unwind_stop: + + jmp L$call_done + +L$no_unwind: + call *%rax + +L$call_done: + + movq 40(%rsp),%rsi + movq %rbx,0(%rsi) + movq %rbp,8(%rsi) + movq %r12,16(%rsi) + movq %r13,24(%rsi) + movq %r14,32(%rsi) + movq %r15,40(%rsi) + movq 64(%rsp),%rbx + + movq 72(%rsp),%rbp + + movq 80(%rsp),%r12 + + movq 88(%rsp),%r13 + + movq 96(%rsp),%r14 + + movq 104(%rsp),%r15 + + addq $120,%rsp + + + + ret + + + + +.globl _abi_test_clobber_rax +.private_extern _abi_test_clobber_rax +.p2align 4 +_abi_test_clobber_rax: +_CET_ENDBR + xorq %rax,%rax + ret + + +.globl _abi_test_clobber_rbx +.private_extern _abi_test_clobber_rbx +.p2align 4 +_abi_test_clobber_rbx: +_CET_ENDBR + xorq %rbx,%rbx + ret + + +.globl _abi_test_clobber_rcx +.private_extern _abi_test_clobber_rcx +.p2align 4 +_abi_test_clobber_rcx: +_CET_ENDBR + xorq %rcx,%rcx + ret + + +.globl _abi_test_clobber_rdx +.private_extern _abi_test_clobber_rdx +.p2align 4 +_abi_test_clobber_rdx: +_CET_ENDBR + xorq %rdx,%rdx + ret + + +.globl _abi_test_clobber_rdi +.private_extern _abi_test_clobber_rdi +.p2align 4 +_abi_test_clobber_rdi: +_CET_ENDBR + xorq %rdi,%rdi + ret + + +.globl _abi_test_clobber_rsi +.private_extern _abi_test_clobber_rsi +.p2align 4 +_abi_test_clobber_rsi: +_CET_ENDBR + xorq %rsi,%rsi + ret + + +.globl _abi_test_clobber_rbp +.private_extern _abi_test_clobber_rbp +.p2align 4 +_abi_test_clobber_rbp: +_CET_ENDBR + xorq %rbp,%rbp + ret + + +.globl _abi_test_clobber_r8 +.private_extern _abi_test_clobber_r8 +.p2align 4 +_abi_test_clobber_r8: +_CET_ENDBR + xorq %r8,%r8 + ret + + +.globl _abi_test_clobber_r9 +.private_extern _abi_test_clobber_r9 +.p2align 4 +_abi_test_clobber_r9: +_CET_ENDBR + xorq %r9,%r9 + ret + + +.globl _abi_test_clobber_r10 +.private_extern _abi_test_clobber_r10 +.p2align 4 +_abi_test_clobber_r10: +_CET_ENDBR + xorq %r10,%r10 + ret + + +.globl _abi_test_clobber_r11 +.private_extern _abi_test_clobber_r11 +.p2align 4 +_abi_test_clobber_r11: +_CET_ENDBR + xorq %r11,%r11 + ret + + +.globl _abi_test_clobber_r12 +.private_extern _abi_test_clobber_r12 +.p2align 4 +_abi_test_clobber_r12: +_CET_ENDBR + xorq %r12,%r12 + ret + + +.globl _abi_test_clobber_r13 +.private_extern _abi_test_clobber_r13 +.p2align 4 +_abi_test_clobber_r13: +_CET_ENDBR + xorq %r13,%r13 + ret + + +.globl _abi_test_clobber_r14 +.private_extern _abi_test_clobber_r14 +.p2align 4 +_abi_test_clobber_r14: +_CET_ENDBR + xorq %r14,%r14 + ret + + +.globl _abi_test_clobber_r15 +.private_extern _abi_test_clobber_r15 +.p2align 4 +_abi_test_clobber_r15: +_CET_ENDBR + xorq %r15,%r15 + ret + + +.globl _abi_test_clobber_xmm0 +.private_extern _abi_test_clobber_xmm0 +.p2align 4 +_abi_test_clobber_xmm0: +_CET_ENDBR + pxor %xmm0,%xmm0 + ret + + +.globl _abi_test_clobber_xmm1 +.private_extern _abi_test_clobber_xmm1 +.p2align 4 +_abi_test_clobber_xmm1: +_CET_ENDBR + pxor %xmm1,%xmm1 + ret + + +.globl _abi_test_clobber_xmm2 +.private_extern _abi_test_clobber_xmm2 +.p2align 4 +_abi_test_clobber_xmm2: +_CET_ENDBR + pxor %xmm2,%xmm2 + ret + + +.globl _abi_test_clobber_xmm3 +.private_extern _abi_test_clobber_xmm3 +.p2align 4 +_abi_test_clobber_xmm3: +_CET_ENDBR + pxor %xmm3,%xmm3 + ret + + +.globl _abi_test_clobber_xmm4 +.private_extern _abi_test_clobber_xmm4 +.p2align 4 +_abi_test_clobber_xmm4: +_CET_ENDBR + pxor %xmm4,%xmm4 + ret + + +.globl _abi_test_clobber_xmm5 +.private_extern _abi_test_clobber_xmm5 +.p2align 4 +_abi_test_clobber_xmm5: +_CET_ENDBR + pxor %xmm5,%xmm5 + ret + + +.globl _abi_test_clobber_xmm6 +.private_extern _abi_test_clobber_xmm6 +.p2align 4 +_abi_test_clobber_xmm6: +_CET_ENDBR + pxor %xmm6,%xmm6 + ret + + +.globl _abi_test_clobber_xmm7 +.private_extern _abi_test_clobber_xmm7 +.p2align 4 +_abi_test_clobber_xmm7: +_CET_ENDBR + pxor %xmm7,%xmm7 + ret + + +.globl _abi_test_clobber_xmm8 +.private_extern _abi_test_clobber_xmm8 +.p2align 4 +_abi_test_clobber_xmm8: +_CET_ENDBR + pxor %xmm8,%xmm8 + ret + + +.globl _abi_test_clobber_xmm9 +.private_extern _abi_test_clobber_xmm9 +.p2align 4 +_abi_test_clobber_xmm9: +_CET_ENDBR + pxor %xmm9,%xmm9 + ret + + +.globl _abi_test_clobber_xmm10 +.private_extern _abi_test_clobber_xmm10 +.p2align 4 +_abi_test_clobber_xmm10: +_CET_ENDBR + pxor %xmm10,%xmm10 + ret + + +.globl _abi_test_clobber_xmm11 +.private_extern _abi_test_clobber_xmm11 +.p2align 4 +_abi_test_clobber_xmm11: +_CET_ENDBR + pxor %xmm11,%xmm11 + ret + + +.globl _abi_test_clobber_xmm12 +.private_extern _abi_test_clobber_xmm12 +.p2align 4 +_abi_test_clobber_xmm12: +_CET_ENDBR + pxor %xmm12,%xmm12 + ret + + +.globl _abi_test_clobber_xmm13 +.private_extern _abi_test_clobber_xmm13 +.p2align 4 +_abi_test_clobber_xmm13: +_CET_ENDBR + pxor %xmm13,%xmm13 + ret + + +.globl _abi_test_clobber_xmm14 +.private_extern _abi_test_clobber_xmm14 +.p2align 4 +_abi_test_clobber_xmm14: +_CET_ENDBR + pxor %xmm14,%xmm14 + ret + + +.globl _abi_test_clobber_xmm15 +.private_extern _abi_test_clobber_xmm15 +.p2align 4 +_abi_test_clobber_xmm15: +_CET_ENDBR + pxor %xmm15,%xmm15 + ret + + + + + +.globl _abi_test_bad_unwind_wrong_register +.private_extern _abi_test_bad_unwind_wrong_register +.p2align 4 +_abi_test_bad_unwind_wrong_register: + + +_CET_ENDBR + pushq %r12 + + + + + + nop + popq %r12 + + ret + + + + + + + + +.globl _abi_test_bad_unwind_temporary +.private_extern _abi_test_bad_unwind_temporary +.p2align 4 +_abi_test_bad_unwind_temporary: + + +_CET_ENDBR + pushq %r12 + + + + movq %r12,%rax + incq %rax + movq %rax,(%rsp) + + + + movq %r12,(%rsp) + + + popq %r12 + + ret + + + + + + + + +.globl _abi_test_get_and_clear_direction_flag +.private_extern _abi_test_get_and_clear_direction_flag +_abi_test_get_and_clear_direction_flag: +_CET_ENDBR + pushfq + popq %rax + andq $0x400,%rax + shrq $10,%rax + cld + ret + + + + + +.globl _abi_test_set_direction_flag +.private_extern _abi_test_set_direction_flag +_abi_test_set_direction_flag: +_CET_ENDBR + std + ret + +#endif diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-linux.S b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-linux.S new file mode 100644 index 0000000000..93af8b9480 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-linux.S @@ -0,0 +1,545 @@ +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.text + + + + + + + + +.type abi_test_trampoline, @function +.globl abi_test_trampoline +.hidden abi_test_trampoline +.align 16 +abi_test_trampoline: +.cfi_startproc + +_CET_ENDBR + + + + + + + + + + subq $120,%rsp +.cfi_adjust_cfa_offset 120 + + movq %r8,48(%rsp) + movq %rbx,64(%rsp) +.cfi_offset rbx, -64 + + movq %rbp,72(%rsp) +.cfi_offset rbp, -56 + + movq %r12,80(%rsp) +.cfi_offset r12, -48 + + movq %r13,88(%rsp) +.cfi_offset r13, -40 + + movq %r14,96(%rsp) +.cfi_offset r14, -32 + + movq %r15,104(%rsp) +.cfi_offset r15, -24 + + movq 0(%rsi),%rbx + movq 8(%rsi),%rbp + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq 32(%rsi),%r14 + movq 40(%rsi),%r15 + + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + + + + + movq %rdx,%r10 + movq %rcx,%r11 + decq %r11 + js .Largs_done + movq (%r10),%rdi + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%rsi + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%rdx + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%rcx + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%r8 + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%r9 + addq $8,%r10 + leaq 0(%rsp),%rax +.Largs_loop: + decq %r11 + js .Largs_done + + + + + + + movq %r11,56(%rsp) + movq (%r10),%r11 + movq %r11,(%rax) + movq 56(%rsp),%r11 + + addq $8,%r10 + addq $8,%rax + jmp .Largs_loop + +.Largs_done: + movq 32(%rsp),%rax + movq 48(%rsp),%r10 + testq %r10,%r10 + jz .Lno_unwind + + + pushfq + orq $0x100,0(%rsp) + popfq + + + + nop +.globl abi_test_unwind_start +.hidden abi_test_unwind_start +abi_test_unwind_start: + + call *%rax +.globl abi_test_unwind_return +.hidden abi_test_unwind_return +abi_test_unwind_return: + + + + + pushfq + andq $-0x101,0(%rsp) + popfq +.globl abi_test_unwind_stop +.hidden abi_test_unwind_stop +abi_test_unwind_stop: + + jmp .Lcall_done + +.Lno_unwind: + call *%rax + +.Lcall_done: + + movq 40(%rsp),%rsi + movq %rbx,0(%rsi) + movq %rbp,8(%rsi) + movq %r12,16(%rsi) + movq %r13,24(%rsi) + movq %r14,32(%rsi) + movq %r15,40(%rsi) + movq 64(%rsp),%rbx +.cfi_restore rbx + movq 72(%rsp),%rbp +.cfi_restore rbp + movq 80(%rsp),%r12 +.cfi_restore r12 + movq 88(%rsp),%r13 +.cfi_restore r13 + movq 96(%rsp),%r14 +.cfi_restore r14 + movq 104(%rsp),%r15 +.cfi_restore r15 + addq $120,%rsp +.cfi_adjust_cfa_offset -120 + + + ret +.cfi_endproc + +.size abi_test_trampoline,.-abi_test_trampoline +.type abi_test_clobber_rax, @function +.globl abi_test_clobber_rax +.hidden abi_test_clobber_rax +.align 16 +abi_test_clobber_rax: +_CET_ENDBR + xorq %rax,%rax + ret +.size abi_test_clobber_rax,.-abi_test_clobber_rax +.type abi_test_clobber_rbx, @function +.globl abi_test_clobber_rbx +.hidden abi_test_clobber_rbx +.align 16 +abi_test_clobber_rbx: +_CET_ENDBR + xorq %rbx,%rbx + ret +.size abi_test_clobber_rbx,.-abi_test_clobber_rbx +.type abi_test_clobber_rcx, @function +.globl abi_test_clobber_rcx +.hidden abi_test_clobber_rcx +.align 16 +abi_test_clobber_rcx: +_CET_ENDBR + xorq %rcx,%rcx + ret +.size abi_test_clobber_rcx,.-abi_test_clobber_rcx +.type abi_test_clobber_rdx, @function +.globl abi_test_clobber_rdx +.hidden abi_test_clobber_rdx +.align 16 +abi_test_clobber_rdx: +_CET_ENDBR + xorq %rdx,%rdx + ret +.size abi_test_clobber_rdx,.-abi_test_clobber_rdx +.type abi_test_clobber_rdi, @function +.globl abi_test_clobber_rdi +.hidden abi_test_clobber_rdi +.align 16 +abi_test_clobber_rdi: +_CET_ENDBR + xorq %rdi,%rdi + ret +.size abi_test_clobber_rdi,.-abi_test_clobber_rdi +.type abi_test_clobber_rsi, @function +.globl abi_test_clobber_rsi +.hidden abi_test_clobber_rsi +.align 16 +abi_test_clobber_rsi: +_CET_ENDBR + xorq %rsi,%rsi + ret +.size abi_test_clobber_rsi,.-abi_test_clobber_rsi +.type abi_test_clobber_rbp, @function +.globl abi_test_clobber_rbp +.hidden abi_test_clobber_rbp +.align 16 +abi_test_clobber_rbp: +_CET_ENDBR + xorq %rbp,%rbp + ret +.size abi_test_clobber_rbp,.-abi_test_clobber_rbp +.type abi_test_clobber_r8, @function +.globl abi_test_clobber_r8 +.hidden abi_test_clobber_r8 +.align 16 +abi_test_clobber_r8: +_CET_ENDBR + xorq %r8,%r8 + ret +.size abi_test_clobber_r8,.-abi_test_clobber_r8 +.type abi_test_clobber_r9, @function +.globl abi_test_clobber_r9 +.hidden abi_test_clobber_r9 +.align 16 +abi_test_clobber_r9: +_CET_ENDBR + xorq %r9,%r9 + ret +.size abi_test_clobber_r9,.-abi_test_clobber_r9 +.type abi_test_clobber_r10, @function +.globl abi_test_clobber_r10 +.hidden abi_test_clobber_r10 +.align 16 +abi_test_clobber_r10: +_CET_ENDBR + xorq %r10,%r10 + ret +.size abi_test_clobber_r10,.-abi_test_clobber_r10 +.type abi_test_clobber_r11, @function +.globl abi_test_clobber_r11 +.hidden abi_test_clobber_r11 +.align 16 +abi_test_clobber_r11: +_CET_ENDBR + xorq %r11,%r11 + ret +.size abi_test_clobber_r11,.-abi_test_clobber_r11 +.type abi_test_clobber_r12, @function +.globl abi_test_clobber_r12 +.hidden abi_test_clobber_r12 +.align 16 +abi_test_clobber_r12: +_CET_ENDBR + xorq %r12,%r12 + ret +.size abi_test_clobber_r12,.-abi_test_clobber_r12 +.type abi_test_clobber_r13, @function +.globl abi_test_clobber_r13 +.hidden abi_test_clobber_r13 +.align 16 +abi_test_clobber_r13: +_CET_ENDBR + xorq %r13,%r13 + ret +.size abi_test_clobber_r13,.-abi_test_clobber_r13 +.type abi_test_clobber_r14, @function +.globl abi_test_clobber_r14 +.hidden abi_test_clobber_r14 +.align 16 +abi_test_clobber_r14: +_CET_ENDBR + xorq %r14,%r14 + ret +.size abi_test_clobber_r14,.-abi_test_clobber_r14 +.type abi_test_clobber_r15, @function +.globl abi_test_clobber_r15 +.hidden abi_test_clobber_r15 +.align 16 +abi_test_clobber_r15: +_CET_ENDBR + xorq %r15,%r15 + ret +.size abi_test_clobber_r15,.-abi_test_clobber_r15 +.type abi_test_clobber_xmm0, @function +.globl abi_test_clobber_xmm0 +.hidden abi_test_clobber_xmm0 +.align 16 +abi_test_clobber_xmm0: +_CET_ENDBR + pxor %xmm0,%xmm0 + ret +.size abi_test_clobber_xmm0,.-abi_test_clobber_xmm0 +.type abi_test_clobber_xmm1, @function +.globl abi_test_clobber_xmm1 +.hidden abi_test_clobber_xmm1 +.align 16 +abi_test_clobber_xmm1: +_CET_ENDBR + pxor %xmm1,%xmm1 + ret +.size abi_test_clobber_xmm1,.-abi_test_clobber_xmm1 +.type abi_test_clobber_xmm2, @function +.globl abi_test_clobber_xmm2 +.hidden abi_test_clobber_xmm2 +.align 16 +abi_test_clobber_xmm2: +_CET_ENDBR + pxor %xmm2,%xmm2 + ret +.size abi_test_clobber_xmm2,.-abi_test_clobber_xmm2 +.type abi_test_clobber_xmm3, @function +.globl abi_test_clobber_xmm3 +.hidden abi_test_clobber_xmm3 +.align 16 +abi_test_clobber_xmm3: +_CET_ENDBR + pxor %xmm3,%xmm3 + ret +.size abi_test_clobber_xmm3,.-abi_test_clobber_xmm3 +.type abi_test_clobber_xmm4, @function +.globl abi_test_clobber_xmm4 +.hidden abi_test_clobber_xmm4 +.align 16 +abi_test_clobber_xmm4: +_CET_ENDBR + pxor %xmm4,%xmm4 + ret +.size abi_test_clobber_xmm4,.-abi_test_clobber_xmm4 +.type abi_test_clobber_xmm5, @function +.globl abi_test_clobber_xmm5 +.hidden abi_test_clobber_xmm5 +.align 16 +abi_test_clobber_xmm5: +_CET_ENDBR + pxor %xmm5,%xmm5 + ret +.size abi_test_clobber_xmm5,.-abi_test_clobber_xmm5 +.type abi_test_clobber_xmm6, @function +.globl abi_test_clobber_xmm6 +.hidden abi_test_clobber_xmm6 +.align 16 +abi_test_clobber_xmm6: +_CET_ENDBR + pxor %xmm6,%xmm6 + ret +.size abi_test_clobber_xmm6,.-abi_test_clobber_xmm6 +.type abi_test_clobber_xmm7, @function +.globl abi_test_clobber_xmm7 +.hidden abi_test_clobber_xmm7 +.align 16 +abi_test_clobber_xmm7: +_CET_ENDBR + pxor %xmm7,%xmm7 + ret +.size abi_test_clobber_xmm7,.-abi_test_clobber_xmm7 +.type abi_test_clobber_xmm8, @function +.globl abi_test_clobber_xmm8 +.hidden abi_test_clobber_xmm8 +.align 16 +abi_test_clobber_xmm8: +_CET_ENDBR + pxor %xmm8,%xmm8 + ret +.size abi_test_clobber_xmm8,.-abi_test_clobber_xmm8 +.type abi_test_clobber_xmm9, @function +.globl abi_test_clobber_xmm9 +.hidden abi_test_clobber_xmm9 +.align 16 +abi_test_clobber_xmm9: +_CET_ENDBR + pxor %xmm9,%xmm9 + ret +.size abi_test_clobber_xmm9,.-abi_test_clobber_xmm9 +.type abi_test_clobber_xmm10, @function +.globl abi_test_clobber_xmm10 +.hidden abi_test_clobber_xmm10 +.align 16 +abi_test_clobber_xmm10: +_CET_ENDBR + pxor %xmm10,%xmm10 + ret +.size abi_test_clobber_xmm10,.-abi_test_clobber_xmm10 +.type abi_test_clobber_xmm11, @function +.globl abi_test_clobber_xmm11 +.hidden abi_test_clobber_xmm11 +.align 16 +abi_test_clobber_xmm11: +_CET_ENDBR + pxor %xmm11,%xmm11 + ret +.size abi_test_clobber_xmm11,.-abi_test_clobber_xmm11 +.type abi_test_clobber_xmm12, @function +.globl abi_test_clobber_xmm12 +.hidden abi_test_clobber_xmm12 +.align 16 +abi_test_clobber_xmm12: +_CET_ENDBR + pxor %xmm12,%xmm12 + ret +.size abi_test_clobber_xmm12,.-abi_test_clobber_xmm12 +.type abi_test_clobber_xmm13, @function +.globl abi_test_clobber_xmm13 +.hidden abi_test_clobber_xmm13 +.align 16 +abi_test_clobber_xmm13: +_CET_ENDBR + pxor %xmm13,%xmm13 + ret +.size abi_test_clobber_xmm13,.-abi_test_clobber_xmm13 +.type abi_test_clobber_xmm14, @function +.globl abi_test_clobber_xmm14 +.hidden abi_test_clobber_xmm14 +.align 16 +abi_test_clobber_xmm14: +_CET_ENDBR + pxor %xmm14,%xmm14 + ret +.size abi_test_clobber_xmm14,.-abi_test_clobber_xmm14 +.type abi_test_clobber_xmm15, @function +.globl abi_test_clobber_xmm15 +.hidden abi_test_clobber_xmm15 +.align 16 +abi_test_clobber_xmm15: +_CET_ENDBR + pxor %xmm15,%xmm15 + ret +.size abi_test_clobber_xmm15,.-abi_test_clobber_xmm15 + + + +.type abi_test_bad_unwind_wrong_register, @function +.globl abi_test_bad_unwind_wrong_register +.hidden abi_test_bad_unwind_wrong_register +.align 16 +abi_test_bad_unwind_wrong_register: +.cfi_startproc + +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-16 + + + + + nop + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret + +.cfi_endproc +.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register + + + + +.type abi_test_bad_unwind_temporary, @function +.globl abi_test_bad_unwind_temporary +.hidden abi_test_bad_unwind_temporary +.align 16 +abi_test_bad_unwind_temporary: +.cfi_startproc + +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + + movq %r12,%rax + incq %rax + movq %rax,(%rsp) + + + + movq %r12,(%rsp) + + + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret +.cfi_endproc + +.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary + + + + +.type abi_test_set_direction_flag, @function +.globl abi_test_get_and_clear_direction_flag +.hidden abi_test_get_and_clear_direction_flag +abi_test_get_and_clear_direction_flag: +_CET_ENDBR + pushfq + popq %rax + andq $0x400,%rax + shrq $10,%rax + cld + ret +.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag + + + +.type abi_test_set_direction_flag, @function +.globl abi_test_set_direction_flag +.hidden abi_test_set_direction_flag +abi_test_set_direction_flag: +_CET_ENDBR + std + ret +.size abi_test_set_direction_flag,.-abi_test_set_direction_flag +#endif diff --git a/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-win.asm b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-win.asm new file mode 100644 index 0000000000..ae04cbed40 --- /dev/null +++ b/yass/third_party/boringssl/src/gen/test_support/trampoline-x86_64-win.asm @@ -0,0 +1,715 @@ +; This file is generated from a similarly-named Perl script in the BoringSSL +; source tree. Do not edit by hand. + +%ifidn __OUTPUT_FORMAT__, win64 +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +%define _CET_ENDBR + +%ifdef BORINGSSL_PREFIX +%include "boringssl_prefix_symbols_nasm.inc" +%endif +section .text code align=64 + + + + + + + + + + +global abi_test_trampoline +ALIGN 16 +abi_test_trampoline: + +$L$SEH_begin_abi_test_trampoline_1: +_CET_ENDBR + + + + + + + + + + sub rsp,344 + +$L$SEH_prolog_abi_test_trampoline_2: + mov QWORD[112+rsp],rbx + +$L$SEH_prolog_abi_test_trampoline_3: + mov QWORD[120+rsp],rbp + +$L$SEH_prolog_abi_test_trampoline_4: + mov QWORD[128+rsp],rdi + +$L$SEH_prolog_abi_test_trampoline_5: + mov QWORD[136+rsp],rsi + +$L$SEH_prolog_abi_test_trampoline_6: + mov QWORD[144+rsp],r12 + +$L$SEH_prolog_abi_test_trampoline_7: + mov QWORD[152+rsp],r13 + +$L$SEH_prolog_abi_test_trampoline_8: + mov QWORD[160+rsp],r14 + +$L$SEH_prolog_abi_test_trampoline_9: + mov QWORD[168+rsp],r15 + +$L$SEH_prolog_abi_test_trampoline_10: + movdqa XMMWORD[176+rsp],xmm6 + +$L$SEH_prolog_abi_test_trampoline_11: + movdqa XMMWORD[192+rsp],xmm7 + +$L$SEH_prolog_abi_test_trampoline_12: + movdqa XMMWORD[208+rsp],xmm8 + +$L$SEH_prolog_abi_test_trampoline_13: + movdqa XMMWORD[224+rsp],xmm9 + +$L$SEH_prolog_abi_test_trampoline_14: + movdqa XMMWORD[240+rsp],xmm10 + +$L$SEH_prolog_abi_test_trampoline_15: + movdqa XMMWORD[256+rsp],xmm11 + +$L$SEH_prolog_abi_test_trampoline_16: + movdqa XMMWORD[272+rsp],xmm12 + +$L$SEH_prolog_abi_test_trampoline_17: + movdqa XMMWORD[288+rsp],xmm13 + +$L$SEH_prolog_abi_test_trampoline_18: + movdqa XMMWORD[304+rsp],xmm14 + +$L$SEH_prolog_abi_test_trampoline_19: + movdqa XMMWORD[320+rsp],xmm15 + +$L$SEH_prolog_abi_test_trampoline_20: + mov rbx,QWORD[rdx] + mov rbp,QWORD[8+rdx] + mov rdi,QWORD[16+rdx] + mov rsi,QWORD[24+rdx] + mov r12,QWORD[32+rdx] + mov r13,QWORD[40+rdx] + mov r14,QWORD[48+rdx] + mov r15,QWORD[56+rdx] + movdqa xmm6,XMMWORD[64+rdx] + movdqa xmm7,XMMWORD[80+rdx] + movdqa xmm8,XMMWORD[96+rdx] + movdqa xmm9,XMMWORD[112+rdx] + movdqa xmm10,XMMWORD[128+rdx] + movdqa xmm11,XMMWORD[144+rdx] + movdqa xmm12,XMMWORD[160+rdx] + movdqa xmm13,XMMWORD[176+rdx] + movdqa xmm14,XMMWORD[192+rdx] + movdqa xmm15,XMMWORD[208+rdx] + + mov QWORD[88+rsp],rcx + mov QWORD[96+rsp],rdx + + + + + mov r10,r8 + mov r11,r9 + dec r11 + js NEAR $L$args_done + mov rcx,QWORD[r10] + add r10,8 + dec r11 + js NEAR $L$args_done + mov rdx,QWORD[r10] + add r10,8 + dec r11 + js NEAR $L$args_done + mov r8,QWORD[r10] + add r10,8 + dec r11 + js NEAR $L$args_done + mov r9,QWORD[r10] + add r10,8 + lea rax,[32+rsp] +$L$args_loop: + dec r11 + js NEAR $L$args_done + + + + + + + mov QWORD[104+rsp],r11 + mov r11,QWORD[r10] + mov QWORD[rax],r11 + mov r11,QWORD[104+rsp] + + add r10,8 + add rax,8 + jmp NEAR $L$args_loop + +$L$args_done: + mov rax,QWORD[88+rsp] + mov r10,QWORD[384+rsp] + test r10,r10 + jz NEAR $L$no_unwind + + + pushfq + or QWORD[rsp],0x100 + popfq + + + + nop +global abi_test_unwind_start +abi_test_unwind_start: + + call rax +global abi_test_unwind_return +abi_test_unwind_return: + + + + + pushfq + and QWORD[rsp],-0x101 + popfq +global abi_test_unwind_stop +abi_test_unwind_stop: + + jmp NEAR $L$call_done + +$L$no_unwind: + call rax + +$L$call_done: + + mov rdx,QWORD[96+rsp] + mov QWORD[rdx],rbx + mov QWORD[8+rdx],rbp + mov QWORD[16+rdx],rdi + mov QWORD[24+rdx],rsi + mov QWORD[32+rdx],r12 + mov QWORD[40+rdx],r13 + mov QWORD[48+rdx],r14 + mov QWORD[56+rdx],r15 + movdqa XMMWORD[64+rdx],xmm6 + movdqa XMMWORD[80+rdx],xmm7 + movdqa XMMWORD[96+rdx],xmm8 + movdqa XMMWORD[112+rdx],xmm9 + movdqa XMMWORD[128+rdx],xmm10 + movdqa XMMWORD[144+rdx],xmm11 + movdqa XMMWORD[160+rdx],xmm12 + movdqa XMMWORD[176+rdx],xmm13 + movdqa XMMWORD[192+rdx],xmm14 + movdqa XMMWORD[208+rdx],xmm15 + mov rbx,QWORD[112+rsp] + + mov rbp,QWORD[120+rsp] + + mov rdi,QWORD[128+rsp] + + mov rsi,QWORD[136+rsp] + + mov r12,QWORD[144+rsp] + + mov r13,QWORD[152+rsp] + + mov r14,QWORD[160+rsp] + + mov r15,QWORD[168+rsp] + + movdqa xmm6,XMMWORD[176+rsp] + + movdqa xmm7,XMMWORD[192+rsp] + + movdqa xmm8,XMMWORD[208+rsp] + + movdqa xmm9,XMMWORD[224+rsp] + + movdqa xmm10,XMMWORD[240+rsp] + + movdqa xmm11,XMMWORD[256+rsp] + + movdqa xmm12,XMMWORD[272+rsp] + + movdqa xmm13,XMMWORD[288+rsp] + + movdqa xmm14,XMMWORD[304+rsp] + + movdqa xmm15,XMMWORD[320+rsp] + + add rsp,344 + + + + ret + +$L$SEH_end_abi_test_trampoline_21: + + +global abi_test_clobber_rax +ALIGN 16 +abi_test_clobber_rax: +_CET_ENDBR + xor rax,rax + ret + + +global abi_test_clobber_rbx +ALIGN 16 +abi_test_clobber_rbx: +_CET_ENDBR + xor rbx,rbx + ret + + +global abi_test_clobber_rcx +ALIGN 16 +abi_test_clobber_rcx: +_CET_ENDBR + xor rcx,rcx + ret + + +global abi_test_clobber_rdx +ALIGN 16 +abi_test_clobber_rdx: +_CET_ENDBR + xor rdx,rdx + ret + + +global abi_test_clobber_rdi +ALIGN 16 +abi_test_clobber_rdi: +_CET_ENDBR + xor rdi,rdi + ret + + +global abi_test_clobber_rsi +ALIGN 16 +abi_test_clobber_rsi: +_CET_ENDBR + xor rsi,rsi + ret + + +global abi_test_clobber_rbp +ALIGN 16 +abi_test_clobber_rbp: +_CET_ENDBR + xor rbp,rbp + ret + + +global abi_test_clobber_r8 +ALIGN 16 +abi_test_clobber_r8: +_CET_ENDBR + xor r8,r8 + ret + + +global abi_test_clobber_r9 +ALIGN 16 +abi_test_clobber_r9: +_CET_ENDBR + xor r9,r9 + ret + + +global abi_test_clobber_r10 +ALIGN 16 +abi_test_clobber_r10: +_CET_ENDBR + xor r10,r10 + ret + + +global abi_test_clobber_r11 +ALIGN 16 +abi_test_clobber_r11: +_CET_ENDBR + xor r11,r11 + ret + + +global abi_test_clobber_r12 +ALIGN 16 +abi_test_clobber_r12: +_CET_ENDBR + xor r12,r12 + ret + + +global abi_test_clobber_r13 +ALIGN 16 +abi_test_clobber_r13: +_CET_ENDBR + xor r13,r13 + ret + + +global abi_test_clobber_r14 +ALIGN 16 +abi_test_clobber_r14: +_CET_ENDBR + xor r14,r14 + ret + + +global abi_test_clobber_r15 +ALIGN 16 +abi_test_clobber_r15: +_CET_ENDBR + xor r15,r15 + ret + + +global abi_test_clobber_xmm0 +ALIGN 16 +abi_test_clobber_xmm0: +_CET_ENDBR + pxor xmm0,xmm0 + ret + + +global abi_test_clobber_xmm1 +ALIGN 16 +abi_test_clobber_xmm1: +_CET_ENDBR + pxor xmm1,xmm1 + ret + + +global abi_test_clobber_xmm2 +ALIGN 16 +abi_test_clobber_xmm2: +_CET_ENDBR + pxor xmm2,xmm2 + ret + + +global abi_test_clobber_xmm3 +ALIGN 16 +abi_test_clobber_xmm3: +_CET_ENDBR + pxor xmm3,xmm3 + ret + + +global abi_test_clobber_xmm4 +ALIGN 16 +abi_test_clobber_xmm4: +_CET_ENDBR + pxor xmm4,xmm4 + ret + + +global abi_test_clobber_xmm5 +ALIGN 16 +abi_test_clobber_xmm5: +_CET_ENDBR + pxor xmm5,xmm5 + ret + + +global abi_test_clobber_xmm6 +ALIGN 16 +abi_test_clobber_xmm6: +_CET_ENDBR + pxor xmm6,xmm6 + ret + + +global abi_test_clobber_xmm7 +ALIGN 16 +abi_test_clobber_xmm7: +_CET_ENDBR + pxor xmm7,xmm7 + ret + + +global abi_test_clobber_xmm8 +ALIGN 16 +abi_test_clobber_xmm8: +_CET_ENDBR + pxor xmm8,xmm8 + ret + + +global abi_test_clobber_xmm9 +ALIGN 16 +abi_test_clobber_xmm9: +_CET_ENDBR + pxor xmm9,xmm9 + ret + + +global abi_test_clobber_xmm10 +ALIGN 16 +abi_test_clobber_xmm10: +_CET_ENDBR + pxor xmm10,xmm10 + ret + + +global abi_test_clobber_xmm11 +ALIGN 16 +abi_test_clobber_xmm11: +_CET_ENDBR + pxor xmm11,xmm11 + ret + + +global abi_test_clobber_xmm12 +ALIGN 16 +abi_test_clobber_xmm12: +_CET_ENDBR + pxor xmm12,xmm12 + ret + + +global abi_test_clobber_xmm13 +ALIGN 16 +abi_test_clobber_xmm13: +_CET_ENDBR + pxor xmm13,xmm13 + ret + + +global abi_test_clobber_xmm14 +ALIGN 16 +abi_test_clobber_xmm14: +_CET_ENDBR + pxor xmm14,xmm14 + ret + + +global abi_test_clobber_xmm15 +ALIGN 16 +abi_test_clobber_xmm15: +_CET_ENDBR + pxor xmm15,xmm15 + ret + + + + + +global abi_test_bad_unwind_wrong_register +ALIGN 16 +abi_test_bad_unwind_wrong_register: + +$L$SEH_begin_abi_test_bad_unwind_wrong_register_1: +_CET_ENDBR + push r12 + +$L$SEH_prolog_abi_test_bad_unwind_wrong_register_2: + + + + nop + pop r12 + + ret +$L$SEH_end_abi_test_bad_unwind_wrong_register_3: + + + + + + + +global abi_test_bad_unwind_temporary +ALIGN 16 +abi_test_bad_unwind_temporary: + +$L$SEH_begin_abi_test_bad_unwind_temporary_1: +_CET_ENDBR + push r12 + +$L$SEH_prolog_abi_test_bad_unwind_temporary_2: + + mov rax,r12 + inc rax + mov QWORD[rsp],rax + + + + mov QWORD[rsp],r12 + + + pop r12 + + ret + +$L$SEH_end_abi_test_bad_unwind_temporary_3: + + + + + + +global abi_test_get_and_clear_direction_flag +abi_test_get_and_clear_direction_flag: +_CET_ENDBR + pushfq + pop rax + and rax,0x400 + shr rax,10 + cld + ret + + + + + +global abi_test_set_direction_flag +abi_test_set_direction_flag: +_CET_ENDBR + std + ret + + + + + + +global abi_test_bad_unwind_epilog +ALIGN 16 +abi_test_bad_unwind_epilog: +$L$SEH_begin_abi_test_bad_unwind_epilog_1: + push r12 +$L$SEH_prolog_abi_test_bad_unwind_epilog_2: + + nop + + + pop r12 + nop + ret +$L$SEH_end_abi_test_bad_unwind_epilog_3: + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_abi_test_trampoline_1 wrt ..imagebase + DD $L$SEH_end_abi_test_trampoline_21 wrt ..imagebase + DD $L$SEH_info_abi_test_trampoline_0 wrt ..imagebase + + DD $L$SEH_begin_abi_test_bad_unwind_wrong_register_1 wrt ..imagebase + DD $L$SEH_end_abi_test_bad_unwind_wrong_register_3 wrt ..imagebase + DD $L$SEH_info_abi_test_bad_unwind_wrong_register_0 wrt ..imagebase + + DD $L$SEH_begin_abi_test_bad_unwind_temporary_1 wrt ..imagebase + DD $L$SEH_end_abi_test_bad_unwind_temporary_3 wrt ..imagebase + DD $L$SEH_info_abi_test_bad_unwind_temporary_0 wrt ..imagebase + + DD $L$SEH_begin_abi_test_bad_unwind_epilog_1 wrt ..imagebase + DD $L$SEH_end_abi_test_bad_unwind_epilog_3 wrt ..imagebase + DD $L$SEH_info_abi_test_bad_unwind_epilog_0 wrt ..imagebase + + +section .xdata rdata align=8 +ALIGN 4 +$L$SEH_info_abi_test_trampoline_0: + DB 1 + DB $L$SEH_prolog_abi_test_trampoline_20-$L$SEH_begin_abi_test_trampoline_1 + DB 38 + DB 0 + DB $L$SEH_prolog_abi_test_trampoline_20-$L$SEH_begin_abi_test_trampoline_1 + DB 248 + DW 20 + DB $L$SEH_prolog_abi_test_trampoline_19-$L$SEH_begin_abi_test_trampoline_1 + DB 232 + DW 19 + DB $L$SEH_prolog_abi_test_trampoline_18-$L$SEH_begin_abi_test_trampoline_1 + DB 216 + DW 18 + DB $L$SEH_prolog_abi_test_trampoline_17-$L$SEH_begin_abi_test_trampoline_1 + DB 200 + DW 17 + DB $L$SEH_prolog_abi_test_trampoline_16-$L$SEH_begin_abi_test_trampoline_1 + DB 184 + DW 16 + DB $L$SEH_prolog_abi_test_trampoline_15-$L$SEH_begin_abi_test_trampoline_1 + DB 168 + DW 15 + DB $L$SEH_prolog_abi_test_trampoline_14-$L$SEH_begin_abi_test_trampoline_1 + DB 152 + DW 14 + DB $L$SEH_prolog_abi_test_trampoline_13-$L$SEH_begin_abi_test_trampoline_1 + DB 136 + DW 13 + DB $L$SEH_prolog_abi_test_trampoline_12-$L$SEH_begin_abi_test_trampoline_1 + DB 120 + DW 12 + DB $L$SEH_prolog_abi_test_trampoline_11-$L$SEH_begin_abi_test_trampoline_1 + DB 104 + DW 11 + DB $L$SEH_prolog_abi_test_trampoline_10-$L$SEH_begin_abi_test_trampoline_1 + DB 244 + DW 21 + DB $L$SEH_prolog_abi_test_trampoline_9-$L$SEH_begin_abi_test_trampoline_1 + DB 228 + DW 20 + DB $L$SEH_prolog_abi_test_trampoline_8-$L$SEH_begin_abi_test_trampoline_1 + DB 212 + DW 19 + DB $L$SEH_prolog_abi_test_trampoline_7-$L$SEH_begin_abi_test_trampoline_1 + DB 196 + DW 18 + DB $L$SEH_prolog_abi_test_trampoline_6-$L$SEH_begin_abi_test_trampoline_1 + DB 100 + DW 17 + DB $L$SEH_prolog_abi_test_trampoline_5-$L$SEH_begin_abi_test_trampoline_1 + DB 116 + DW 16 + DB $L$SEH_prolog_abi_test_trampoline_4-$L$SEH_begin_abi_test_trampoline_1 + DB 84 + DW 15 + DB $L$SEH_prolog_abi_test_trampoline_3-$L$SEH_begin_abi_test_trampoline_1 + DB 52 + DW 14 + DB $L$SEH_prolog_abi_test_trampoline_2-$L$SEH_begin_abi_test_trampoline_1 + DB 1 + DW 43 + +$L$SEH_info_abi_test_bad_unwind_wrong_register_0: + DB 1 + DB $L$SEH_prolog_abi_test_bad_unwind_wrong_register_2-$L$SEH_begin_abi_test_bad_unwind_wrong_register_1 + DB 1 + DB 0 + DB $L$SEH_prolog_abi_test_bad_unwind_wrong_register_2-$L$SEH_begin_abi_test_bad_unwind_wrong_register_1 + DB 208 + +$L$SEH_info_abi_test_bad_unwind_temporary_0: + DB 1 + DB $L$SEH_prolog_abi_test_bad_unwind_temporary_2-$L$SEH_begin_abi_test_bad_unwind_temporary_1 + DB 1 + DB 0 + DB $L$SEH_prolog_abi_test_bad_unwind_temporary_2-$L$SEH_begin_abi_test_bad_unwind_temporary_1 + DB 192 + +$L$SEH_info_abi_test_bad_unwind_epilog_0: + DB 1 + DB $L$SEH_prolog_abi_test_bad_unwind_epilog_2-$L$SEH_begin_abi_test_bad_unwind_epilog_1 + DB 1 + DB 0 + DB $L$SEH_prolog_abi_test_bad_unwind_epilog_2-$L$SEH_begin_abi_test_bad_unwind_epilog_1 + DB 192 +%else +; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 +ret +%endif diff --git a/yass/third_party/boringssl/src/include/openssl/base.h b/yass/third_party/boringssl/src/include/openssl/base.h index 57095d053f..29b598e154 100644 --- a/yass/third_party/boringssl/src/include/openssl/base.h +++ b/yass/third_party/boringssl/src/include/openssl/base.h @@ -181,6 +181,13 @@ extern "C" { #define OPENSSL_PRINTF_FORMAT_FUNC(string_index, first_to_check) #endif +// OPENSSL_CLANG_PRAGMA emits a pragma on clang and nothing on other compilers. +#if defined(__clang__) +#define OPENSSL_CLANG_PRAGMA(arg) _Pragma(arg) +#else +#define OPENSSL_CLANG_PRAGMA(arg) +#endif + // OPENSSL_MSVC_PRAGMA emits a pragma on MSVC and nothing on other compilers. #if defined(_MSC_VER) #define OPENSSL_MSVC_PRAGMA(arg) __pragma(arg) diff --git a/yass/third_party/boringssl/src/include/openssl/bytestring.h b/yass/third_party/boringssl/src/include/openssl/bytestring.h index 0d486288b7..961b7e3075 100644 --- a/yass/third_party/boringssl/src/include/openssl/bytestring.h +++ b/yass/third_party/boringssl/src/include/openssl/bytestring.h @@ -639,6 +639,9 @@ OPENSSL_EXPORT int CBB_flush_asn1_set_of(CBB *cbb); // Unicode utilities. +// +// These functions consider noncharacters (see section 23.7 from Unicode 15.0.0) +// to be invalid code points and will treat them as an error condition. // The following functions read one Unicode code point from |cbs| with the // corresponding encoding and store it in |*out|. They return one on success and @@ -653,7 +656,9 @@ OPENSSL_EXPORT int CBS_get_utf32_be(CBS *cbs, uint32_t *out); OPENSSL_EXPORT size_t CBB_get_utf8_len(uint32_t u); // The following functions encode |u| to |cbb| with the corresponding -// encoding. They return one on success and zero on error. +// encoding. They return one on success and zero on error. Error conditions +// include |u| being an invalid code point, or |u| being unencodable in the +// specified encoding. OPENSSL_EXPORT int CBB_add_utf8(CBB *cbb, uint32_t u); OPENSSL_EXPORT int CBB_add_latin1(CBB *cbb, uint32_t u); OPENSSL_EXPORT int CBB_add_ucs2_be(CBB *cbb, uint32_t u); diff --git a/yass/third_party/boringssl/src/include/openssl/dh.h b/yass/third_party/boringssl/src/include/openssl/dh.h index a3094d8f6e..6373bbbfac 100644 --- a/yass/third_party/boringssl/src/include/openssl/dh.h +++ b/yass/third_party/boringssl/src/include/openssl/dh.h @@ -75,6 +75,12 @@ extern "C" { // Allocation and destruction. +// +// A |DH| object represents a Diffie-Hellman key or group parameters. A given +// object may be used concurrently on multiple threads by non-mutating +// functions, provided no other thread is concurrently calling a mutating +// function. Unless otherwise documented, functions which take a |const| pointer +// are non-mutating and functions which take a non-|const| pointer are mutating. // DH_new returns a new, empty DH object or NULL on error. OPENSSL_EXPORT DH *DH_new(void); @@ -83,7 +89,8 @@ OPENSSL_EXPORT DH *DH_new(void); // count drops to zero. OPENSSL_EXPORT void DH_free(DH *dh); -// DH_up_ref increments the reference count of |dh| and returns one. +// DH_up_ref increments the reference count of |dh| and returns one. It does not +// mutate |dh| for thread-safety purposes and may be used concurrently. OPENSSL_EXPORT int DH_up_ref(DH *dh); @@ -214,6 +221,9 @@ OPENSSL_EXPORT int DH_generate_key(DH *dh); // Callers that expect a fixed-width secret should use this function over // |DH_compute_key|. Callers that use either function should migrate to a modern // primitive such as X25519 or ECDH with P-256 instead. +// +// This function does not mutate |dh| for thread-safety purposes and may be used +// concurrently. OPENSSL_EXPORT int DH_compute_key_padded(uint8_t *out, const BIGNUM *peers_key, DH *dh); @@ -225,6 +235,9 @@ OPENSSL_EXPORT int DH_compute_key_padded(uint8_t *out, const BIGNUM *peers_key, // // NOTE: this follows the usual BoringSSL return-value convention, but that's // different from |DH_compute_key| and |DH_compute_key_padded|. +// +// This function does not mutate |dh| for thread-safety purposes and may be used +// concurrently. OPENSSL_EXPORT int DH_compute_key_hashed(DH *dh, uint8_t *out, size_t *out_len, size_t max_out_len, const BIGNUM *peers_key, @@ -327,6 +340,9 @@ OPENSSL_EXPORT int i2d_DHparams(const DH *in, unsigned char **outp); // Callers that expect a fixed-width secret should use |DH_compute_key_padded| // instead. Callers that use either function should migrate to a modern // primitive such as X25519 or ECDH with P-256 instead. +// +// This function does not mutate |dh| for thread-safety purposes and may be used +// concurrently. OPENSSL_EXPORT int DH_compute_key(uint8_t *out, const BIGNUM *peers_key, DH *dh); diff --git a/yass/third_party/boringssl/src/include/openssl/dsa.h b/yass/third_party/boringssl/src/include/openssl/dsa.h index 4075001af2..f46a8fcd5a 100644 --- a/yass/third_party/boringssl/src/include/openssl/dsa.h +++ b/yass/third_party/boringssl/src/include/openssl/dsa.h @@ -78,6 +78,12 @@ extern "C" { // Allocation and destruction. +// +// A |DSA| object represents a DSA key or group parameters. A given object may +// be used concurrently on multiple threads by non-mutating functions, provided +// no other thread is concurrently calling a mutating function. Unless otherwise +// documented, functions which take a |const| pointer are non-mutating and +// functions which take a non-|const| pointer are mutating. // DSA_new returns a new, empty DSA object or NULL on error. OPENSSL_EXPORT DSA *DSA_new(void); @@ -86,7 +92,8 @@ OPENSSL_EXPORT DSA *DSA_new(void); // reference count drops to zero. OPENSSL_EXPORT void DSA_free(DSA *dsa); -// DSA_up_ref increments the reference count of |dsa| and returns one. +// DSA_up_ref increments the reference count of |dsa| and returns one. It does +// not mutate |dsa| for thread-safety purposes and may be used concurrently. OPENSSL_EXPORT int DSA_up_ref(DSA *dsa); @@ -216,7 +223,7 @@ OPENSSL_EXPORT DSA_SIG *DSA_do_sign(const uint8_t *digest, size_t digest_len, // // TODO(fork): deprecate. OPENSSL_EXPORT int DSA_do_verify(const uint8_t *digest, size_t digest_len, - DSA_SIG *sig, const DSA *dsa); + const DSA_SIG *sig, const DSA *dsa); // DSA_do_check_signature sets |*out_valid| to zero. Then it verifies that |sig| // is a valid signature, by the public key in |dsa| of the hash in |digest| @@ -225,7 +232,7 @@ OPENSSL_EXPORT int DSA_do_verify(const uint8_t *digest, size_t digest_len, // It returns one if it was able to verify the signature as valid or invalid, // and zero on error. OPENSSL_EXPORT int DSA_do_check_signature(int *out_valid, const uint8_t *digest, - size_t digest_len, DSA_SIG *sig, + size_t digest_len, const DSA_SIG *sig, const DSA *dsa); diff --git a/yass/third_party/boringssl/src/include/openssl/evp.h b/yass/third_party/boringssl/src/include/openssl/evp.h index 8a9d620124..43180f2607 100644 --- a/yass/third_party/boringssl/src/include/openssl/evp.h +++ b/yass/third_party/boringssl/src/include/openssl/evp.h @@ -136,10 +136,6 @@ OPENSSL_EXPORT int EVP_PKEY_bits(const EVP_PKEY *pkey); // values. OPENSSL_EXPORT int EVP_PKEY_id(const EVP_PKEY *pkey); -// EVP_PKEY_type returns |nid| if |nid| is a known key type and |NID_undef| -// otherwise. -OPENSSL_EXPORT int EVP_PKEY_type(int nid); - // Getting and setting concrete public key types. // @@ -171,6 +167,11 @@ OPENSSL_EXPORT int EVP_PKEY_assign_EC_KEY(EVP_PKEY *pkey, EC_KEY *key); OPENSSL_EXPORT EC_KEY *EVP_PKEY_get0_EC_KEY(const EVP_PKEY *pkey); OPENSSL_EXPORT EC_KEY *EVP_PKEY_get1_EC_KEY(const EVP_PKEY *pkey); +OPENSSL_EXPORT int EVP_PKEY_set1_DH(EVP_PKEY *pkey, DH *key); +OPENSSL_EXPORT int EVP_PKEY_assign_DH(EVP_PKEY *pkey, DH *key); +OPENSSL_EXPORT DH *EVP_PKEY_get0_DH(const EVP_PKEY *pkey); +OPENSSL_EXPORT DH *EVP_PKEY_get1_DH(const EVP_PKEY *pkey); + #define EVP_PKEY_NONE NID_undef #define EVP_PKEY_RSA NID_rsaEncryption #define EVP_PKEY_RSA_PSS NID_rsassaPss @@ -179,6 +180,7 @@ OPENSSL_EXPORT EC_KEY *EVP_PKEY_get1_EC_KEY(const EVP_PKEY *pkey); #define EVP_PKEY_ED25519 NID_ED25519 #define EVP_PKEY_X25519 NID_X25519 #define EVP_PKEY_HKDF NID_hkdf +#define EVP_PKEY_DH NID_dhKeyAgreement // EVP_PKEY_set_type sets the type of |pkey| to |type|. It returns one if // successful or zero if the |type| argument is not one of the |EVP_PKEY_*| @@ -814,11 +816,23 @@ OPENSSL_EXPORT int EVP_PKEY_CTX_set_ec_paramgen_curve_nid(EVP_PKEY_CTX *ctx, int nid); -// Deprecated functions. +// Diffie-Hellman-specific control functions. -// EVP_PKEY_DH is defined for compatibility, but it is impossible to create an -// |EVP_PKEY| of that type. -#define EVP_PKEY_DH NID_dhKeyAgreement +// EVP_PKEY_CTX_set_dh_pad configures configures whether |ctx|, which must be an +// |EVP_PKEY_derive| operation, configures the handling of leading zeros in the +// Diffie-Hellman shared secret. If |pad| is zero, leading zeros are removed +// from the secret. If |pad| is non-zero, the fixed-width shared secret is used +// unmodified, as in PKCS #3. If this function is not called, the default is to +// remove leading zeros. +// +// WARNING: The behavior when |pad| is zero leaks information about the shared +// secret. This may result in side channel attacks such as +// https://raccoon-attack.com/, particularly when the same private key is used +// for multiple operations. +OPENSSL_EXPORT int EVP_PKEY_CTX_set_dh_pad(EVP_PKEY_CTX *ctx, int pad); + + +// Deprecated functions. // EVP_PKEY_RSA2 was historically an alternate form for RSA public keys (OID // 2.5.8.1.1), but is no longer accepted. @@ -917,12 +931,6 @@ OPENSSL_EXPORT EVP_PKEY *d2i_AutoPrivateKey(EVP_PKEY **out, const uint8_t **inp, OPENSSL_EXPORT EVP_PKEY *d2i_PublicKey(int type, EVP_PKEY **out, const uint8_t **inp, long len); -// EVP_PKEY_get0_DH returns NULL. -OPENSSL_EXPORT DH *EVP_PKEY_get0_DH(const EVP_PKEY *pkey); - -// EVP_PKEY_get1_DH returns NULL. -OPENSSL_EXPORT DH *EVP_PKEY_get1_DH(const EVP_PKEY *pkey); - // EVP_PKEY_CTX_set_ec_param_enc returns one if |encoding| is // |OPENSSL_EC_NAMED_CURVE| or zero with an error otherwise. OPENSSL_EXPORT int EVP_PKEY_CTX_set_ec_param_enc(EVP_PKEY_CTX *ctx, @@ -1036,6 +1044,9 @@ OPENSSL_EXPORT int EVP_PKEY_CTX_set_dsa_paramgen_q_bits(EVP_PKEY_CTX *ctx, // Use the |EVP_PKEY_assign_*| functions instead. OPENSSL_EXPORT int EVP_PKEY_assign(EVP_PKEY *pkey, int type, void *key); +// EVP_PKEY_type returns |nid|. +OPENSSL_EXPORT int EVP_PKEY_type(int nid); + // Preprocessor compatibility section (hidden). // diff --git a/yass/third_party/boringssl/src/include/openssl/evp_errors.h b/yass/third_party/boringssl/src/include/openssl/evp_errors.h index 8583f521c5..163f17e2ba 100644 --- a/yass/third_party/boringssl/src/include/openssl/evp_errors.h +++ b/yass/third_party/boringssl/src/include/openssl/evp_errors.h @@ -95,5 +95,6 @@ #define EVP_R_NOT_XOF_OR_INVALID_LENGTH 135 #define EVP_R_EMPTY_PSK 136 #define EVP_R_INVALID_BUFFER_SIZE 137 +#define EVP_R_EXPECTING_A_DH_KEY 138 #endif // OPENSSL_HEADER_EVP_ERRORS_H diff --git a/yass/third_party/boringssl/src/include/openssl/experimental/spx.h b/yass/third_party/boringssl/src/include/openssl/experimental/spx.h index 713322a107..58dd47246d 100644 --- a/yass/third_party/boringssl/src/include/openssl/experimental/spx.h +++ b/yass/third_party/boringssl/src/include/openssl/experimental/spx.h @@ -22,6 +22,14 @@ extern "C" { #endif +#if defined(OPENSSL_UNSTABLE_EXPERIMENTAL_SPX) +// This header implements experimental, draft versions of not-yet-standardized +// primitives. When the standard is complete, these functions will be removed +// and replaced with the final, incompatible standard version. They are +// available now for short-lived experiments, but must not be deployed anywhere +// durable, such as a long-lived key store. To use these functions define +// OPENSSL_UNSTABLE_EXPERIMENTAL_SPX + // SPX_N is the number of bytes in the hash output #define SPX_N 16 @@ -37,39 +45,42 @@ extern "C" { // SPHINCS+-SHA2-128s #define SPX_SIGNATURE_BYTES 7856 -// spx_generate_key generates a SPHINCS+-SHA2-128s key pair and writes the +// SPX_generate_key generates a SPHINCS+-SHA2-128s key pair and writes the // result to |out_public_key| and |out_secret_key|. // Private key: SK.seed || SK.prf || PK.seed || PK.root // Public key: PK.seed || PK.root -OPENSSL_EXPORT void spx_generate_key( +OPENSSL_EXPORT void SPX_generate_key( uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], uint8_t out_secret_key[SPX_SECRET_KEY_BYTES]); -// spx_generate_key_from_seed generates a SPHINCS+-SHA2-128s key pair from a +// SPX_generate_key_from_seed generates a SPHINCS+-SHA2-128s key pair from a // 48-byte seed and writes the result to |out_public_key| and |out_secret_key|. // Secret key: SK.seed || SK.prf || PK.seed || PK.root // Public key: PK.seed || PK.root -OPENSSL_EXPORT void spx_generate_key_from_seed( +OPENSSL_EXPORT void SPX_generate_key_from_seed( uint8_t out_public_key[SPX_PUBLIC_KEY_BYTES], uint8_t out_secret_key[SPX_SECRET_KEY_BYTES], const uint8_t seed[3 * SPX_N]); -// spx_sign generates a SPHINCS+-SHA2-128s signature over |msg| or length +// SPX_sign generates a SPHINCS+-SHA2-128s signature over |msg| or length // |msg_len| using |secret_key| and writes the output to |out_signature|. // // if |randomized| is 0, deterministic signing is performed, otherwise, // non-deterministic signing is performed. -OPENSSL_EXPORT void spx_sign(uint8_t out_snignature[SPX_SIGNATURE_BYTES], - const uint8_t secret_key[SPX_SECRET_KEY_BYTES], - const uint8_t *msg, size_t msg_len, - int randomized); +OPENSSL_EXPORT void SPX_sign( + uint8_t out_snignature[SPX_SIGNATURE_BYTES], + const uint8_t secret_key[SPX_SECRET_KEY_BYTES], const uint8_t *msg, + size_t msg_len, int randomized); -// spx_verify verifies a SPHINCS+-SHA2-128s signature in |signature| over |msg| +// SPX_verify verifies a SPHINCS+-SHA2-128s signature in |signature| over |msg| // or length |msg_len| using |public_key|. 1 is returned if the signature // matches, 0 otherwise. -OPENSSL_EXPORT int spx_verify(const uint8_t signature[SPX_SIGNATURE_BYTES], - const uint8_t public_key[SPX_SECRET_KEY_BYTES], - const uint8_t *msg, size_t msg_len); +OPENSSL_EXPORT int SPX_verify( + const uint8_t signature[SPX_SIGNATURE_BYTES], + const uint8_t public_key[SPX_SECRET_KEY_BYTES], const uint8_t *msg, + size_t msg_len); + +#endif //OPENSSL_UNSTABLE_EXPERIMENTAL_SPX #if defined(__cplusplus) diff --git a/yass/third_party/boringssl/src/include/openssl/pki/verify_error.h b/yass/third_party/boringssl/src/include/openssl/pki/verify_error.h new file mode 100644 index 0000000000..34b5dc5f16 --- /dev/null +++ b/yass/third_party/boringssl/src/include/openssl/pki/verify_error.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2024, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#if !defined(OPENSSL_HEADER_BSSL_PKI_VERIFY_ERROR_H_) && defined(__cplusplus) +#define OPENSSL_HEADER_BSSL_PKI_VERIFY_ERROR_H_ + +#include +#include + +namespace bssl { + +// VerifyError describes certificate chain validation result. +class OPENSSL_EXPORT VerifyError { + public: + VerifyError() = default; + VerifyError(const VerifyError &other) = default; + VerifyError &operator=(const VerifyError &other) = default; + + // Code is the representation of a single error that we could + // find. + enum class StatusCode { + // PATH_VERIFIED means there were no errors, the certificate chain is valid. + PATH_VERIFIED, + + // CERTIFICATE_INVALID_SIGNATURE means that the certificate's signature + // failed to verify. + CERTIFICATE_INVALID_SIGNATURE, + + // CERTIFICATE_UNSUPPORTED_KEY means that the certificate's key type and/or + // size is not supported. + CERTIFICATE_UNSUPPORTED_KEY, + + // CERTIFICATE_UNSUPPORTED_SIGNATURE ALGORITHM means that the signature + // algorithm is not supported. + CERTIFICATE_UNSUPPORTED_SIGNATURE_ALGORITHM, + + // CERTIFICATE_REVOKED means that the certificate has been revoked. + CERTIFICATE_REVOKED, + + // CERTIFICATE_NO_REVOCATION_MECHANISM means that revocation checking was + // required and no revocation mechanism was given for the certificate + CERTIFICATE_NO_REVOCATION_MECHANISM, + + // CERTIFICATE_UNABLE_TO_CHECK_REVOCATION means that revocation checking was + // required and we were unable to check if the certificate was revoked via + // any revocation mechanism. + CERTIFICATE_UNABLE_TO_CHECK_REVOCATION, + + // CERTIFICATE_EXPIRED means that the validation time is after the + // certificate's |notAfter| timestamp. + CERTIFICATE_EXPIRED, + + // CERTIFICATE_NOT_YET_VALID means that the validation time is before the + // certificate's |notBefore| timestamp. + CERTIFICATE_NOT_YET_VALID, + + // CERTIFICATE_NO_MATCHING_EKU means that the certificate's EKU does not + // allow the certificate to be used for the intended purpose. + CERTIFICATE_NO_MATCHING_EKU, + + // CERTIFICATE_INVALID means that the certificate was structurally + // invalid, or invalid for some different reason than the above. + CERTIFICATE_INVALID, + + // PATH_NOT_FOUND means that no path could be found from the leaf + // certificate to any trust anchor. + PATH_NOT_FOUND, + + // PATH_ITERATION_COUNT_EXCEEDED means that the iteration limit for path + // building was hit and so the search for a valid path terminated early. + PATH_ITERATION_COUNT_EXCEEDED, + + // PATH_DEADLINE_EXCEEDED means that the time limit for path building + // was hit and so the search for a valid path terminated early. + PATH_DEADLINE_EXCEEDED, + + // PATH_DEPTH_LIMIT_REACHED means that path building was not able to find a + // path within the configured depth limit for verification. + PATH_DEPTH_LIMIT_REACHED, + + // PATH_MULTIPLE_ERRORS indicates that there are multiple fatal + // errors present on the certificate chain, so that a single error could + // not be reported. + PATH_MULTIPLE_ERRORS, + + // VERIFICATION_FAILURE means that something is wrong with the returned path + // that is not specific to a single certificate. There are many possible + // reasons for a verification to fail. + VERIFICATION_FAILURE, + }; + + VerifyError(StatusCode code, ptrdiff_t offset, std::string diagnostic); + + // Code returns the indicated error code for the certificate path. + StatusCode Code() const; + + // Index returns the certificate in the chain for which the error first + // occured, starting with 0 for the leaf certificate. Later certificates in + // the chain may also exhibit the same error. If the error is not specific to + // a certificate, -1 is returned. + ptrdiff_t Index() const; + + // DiagnosticString returns a string of diagnostic information related to this + // verification attempt. The string aims to be useful to debugging, but it is + // not stable and may not be processed programmatically or asserted on in + // tests. The string may be empty if no diagnostic information was available. + // + // The DiagnosticString is specifically not guaranteed to be unchanging for + // any given error code, as the diagnostic error message can contain + // information specific to the verification attempt and chain presented, due + // to there being multiple possible ways for, as an example, a certificate to + // be invalid, or that we are unable to build a path to a trust anchor. + // + // Needless to say, one should not attempt to parse the string that is + // returned. + const std::string &DiagnosticString() const; + + private: + ptrdiff_t offset_ = -1; + StatusCode code_ = StatusCode::VERIFICATION_FAILURE; + std::string diagnostic_; +}; + +} // namespace bssl + +#endif // OPENSSL_HEADER_BSSL_PKI_VERIFY_ERROR_H_ diff --git a/yass/third_party/boringssl/src/include/openssl/rand.h b/yass/third_party/boringssl/src/include/openssl/rand.h index 215798e524..4154ef4688 100644 --- a/yass/third_party/boringssl/src/include/openssl/rand.h +++ b/yass/third_party/boringssl/src/include/openssl/rand.h @@ -33,20 +33,29 @@ OPENSSL_EXPORT int RAND_bytes(uint8_t *buf, size_t len); // Obscure functions. #if !defined(OPENSSL_WINDOWS) -// RAND_enable_fork_unsafe_buffering enables efficient buffered reading of -// /dev/urandom. It adds an overhead of a few KB of memory per thread. It must -// be called before the first call to |RAND_bytes|. +// RAND_enable_fork_unsafe_buffering indicates that clones of the address space, +// e.g. via |fork|, will never call into BoringSSL. It may be used to disable +// BoringSSL's more expensive fork-safety measures. However, calling this +// function and then using BoringSSL across |fork| calls will leak secret keys. +// |fd| must be -1. // -// |fd| must be -1. We no longer support setting the file descriptor with this -// function. +// WARNING: This function affects BoringSSL for the entire address space. Thus +// this function should never be called by library code, only by code with +// global knowledge of the application's use of BoringSSL. // -// It has an unusual name because the buffer is unsafe across calls to |fork|. -// Hence, this function should never be called by libraries. +// Do not use this function unless a performance issue was measured with the +// default behavior. BoringSSL can efficiently detect forks on most platforms, +// in which case this function is a no-op and is unnecessary. In particular, +// Linux kernel versions 4.14 or later provide |MADV_WIPEONFORK|. Future +// versions of BoringSSL will remove this functionality when older kernels are +// sufficiently rare. +// +// This function has an unusual name because it historically controlled internal +// buffers, but no longer does. OPENSSL_EXPORT void RAND_enable_fork_unsafe_buffering(int fd); -// RAND_disable_fork_unsafe_buffering disables efficient buffered reading of -// /dev/urandom, causing BoringSSL to always draw entropy on every request -// for random bytes. +// RAND_disable_fork_unsafe_buffering restores BoringSSL's default fork-safety +// protections. See also |RAND_enable_fork_unsafe_buffering|. OPENSSL_EXPORT void RAND_disable_fork_unsafe_buffering(void); #endif diff --git a/yass/third_party/boringssl/src/include/openssl/ssl.h b/yass/third_party/boringssl/src/include/openssl/ssl.h index d73f9da9db..49fb47ea6c 100644 --- a/yass/third_party/boringssl/src/include/openssl/ssl.h +++ b/yass/third_party/boringssl/src/include/openssl/ssl.h @@ -3987,7 +3987,7 @@ OPENSSL_EXPORT void SSL_get0_ech_retry_configs( // to the size of the buffer. The caller must call |OPENSSL_free| on |*out| to // release the memory. On failure, it returns zero. // -// The |config_id| field is a single byte identifer for the ECHConfig. Reusing +// The |config_id| field is a single byte identifier for the ECHConfig. Reusing // config IDs is allowed, but if multiple ECHConfigs with the same config ID are // active at a time, server load may increase. See // |SSL_ECH_KEYS_has_duplicate_config_id|. diff --git a/yass/third_party/boringssl/src/include/openssl/stack.h b/yass/third_party/boringssl/src/include/openssl/stack.h index 23b9d89253..0eccb2f0cb 100644 --- a/yass/third_party/boringssl/src/include/openssl/stack.h +++ b/yass/third_party/boringssl/src/include/openssl/stack.h @@ -139,7 +139,8 @@ STACK_OF(SAMPLE) *sk_SAMPLE_new(sk_SAMPLE_cmp_func comp); STACK_OF(SAMPLE) *sk_SAMPLE_new_null(void); // sk_SAMPLE_num returns the number of elements in |sk|. It is safe to cast this -// value to |int|. |sk| is guaranteed to have at most |INT_MAX| elements. +// value to |int|. |sk| is guaranteed to have at most |INT_MAX| elements. If +// |sk| is NULL, it is treated as the empty list and this function returns zero. size_t sk_SAMPLE_num(const STACK_OF(SAMPLE) *sk); // sk_SAMPLE_zero resets |sk| to the empty state but does nothing to free the @@ -147,7 +148,8 @@ size_t sk_SAMPLE_num(const STACK_OF(SAMPLE) *sk); void sk_SAMPLE_zero(STACK_OF(SAMPLE) *sk); // sk_SAMPLE_value returns the |i|th pointer in |sk|, or NULL if |i| is out of -// range. +// range. If |sk| is NULL, it is treated as an empty list and the function +// returns NULL. SAMPLE *sk_SAMPLE_value(const STACK_OF(SAMPLE) *sk, size_t i); // sk_SAMPLE_set sets the |i|th pointer in |sk| to |p| and returns |p|. If |i| @@ -195,7 +197,8 @@ void sk_SAMPLE_delete_if(STACK_OF(SAMPLE) *sk, sk_SAMPLE_delete_if_func func, // If the stack is sorted (see |sk_SAMPLE_sort|), this function uses a binary // search. Otherwise it performs a linear search. If it finds a matching // element, it writes the index to |*out_index| (if |out_index| is not NULL) and -// returns one. Otherwise, it returns zero. +// returns one. Otherwise, it returns zero. If |sk| is NULL, it is treated as +// the empty list and the function returns zero. // // Note this differs from OpenSSL. The type signature is slightly different, and // OpenSSL's version will implicitly sort |sk| if it has a comparison function @@ -399,6 +402,9 @@ BSSL_NAMESPACE_END * positive warning. */ \ OPENSSL_MSVC_PRAGMA(warning(push)) \ OPENSSL_MSVC_PRAGMA(warning(disable : 4191)) \ + OPENSSL_CLANG_PRAGMA("clang diagnostic push") \ + OPENSSL_CLANG_PRAGMA("clang diagnostic ignored \"-Wunknown-warning-option\"") \ + OPENSSL_CLANG_PRAGMA("clang diagnostic ignored \"-Wcast-function-type-strict\"") \ \ DECLARE_STACK_OF(name) \ \ @@ -534,6 +540,7 @@ BSSL_NAMESPACE_END (OPENSSL_sk_free_func)free_func); \ } \ \ + OPENSSL_CLANG_PRAGMA("clang diagnostic pop") \ OPENSSL_MSVC_PRAGMA(warning(pop)) diff --git a/yass/third_party/boringssl/src/include/openssl/x509.h b/yass/third_party/boringssl/src/include/openssl/x509.h index 7d4952fe32..a072d6f478 100644 --- a/yass/third_party/boringssl/src/include/openssl/x509.h +++ b/yass/third_party/boringssl/src/include/openssl/x509.h @@ -254,9 +254,9 @@ OPENSSL_EXPORT void X509_get0_uids(const X509 *x509, // should not be accepted. #define EXFLAG_CRITICAL 0x200 // EXFLAG_SS indicates the certificate is likely self-signed. That is, if it is -// self-issued, its authority key identifer (if any) matches itself, and its key -// usage extension (if any) allows certificate signatures. The signature itself -// is not checked in computing this bit. +// self-issued, its authority key identifier (if any) matches itself, and its +// key usage extension (if any) allows certificate signatures. The signature +// itself is not checked in computing this bit. #define EXFLAG_SS 0x2000 // X509_get_extension_flags decodes a set of extensions from |x509| and returns @@ -2696,8 +2696,18 @@ OPENSSL_EXPORT void X509_ALGOR_get0(const ASN1_OBJECT **out_obj, // X509_ALGOR_set_md sets |alg| to the hash function |md|. Note this // AlgorithmIdentifier represents the hash function itself, not a signature -// algorithm that uses |md|. -OPENSSL_EXPORT void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); +// algorithm that uses |md|. It returns one on success and zero on error. +// +// Due to historical specification mistakes (see Section 2.1 of RFC 4055), the +// parameters field is sometimes omitted and sometimes a NULL value. When used +// in RSASSA-PSS and RSAES-OAEP, it should be a NULL value. In other contexts, +// the parameters should be omitted. This function assumes the caller is +// constructing a RSASSA-PSS or RSAES-OAEP AlgorithmIdentifier and includes a +// NULL parameter. This differs from OpenSSL's behavior. +// +// TODO(davidben): Rename this function, or perhaps just add a bespoke API for +// constructing PSS and move on. +OPENSSL_EXPORT int X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); // X509_ALGOR_cmp returns zero if |a| and |b| are equal, and some non-zero value // otherwise. Note this function can only be used for equality checks, not an @@ -3022,6 +3032,9 @@ OPENSSL_EXPORT int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *store, // |X509_STORE_CTX_get1_chain| may be used to return the verified certificate // chain. On error, |X509_STORE_CTX_get_error| may be used to return additional // error information. +// +// WARNING: Most failure conditions from this function do not use the error +// queue. Use |X509_STORE_CTX_get_error| to determine the cause of the error. OPENSSL_EXPORT int X509_verify_cert(X509_STORE_CTX *ctx); // X509_STORE_CTX_get0_chain, after a successful |X509_verify_cert| call, diff --git a/yass/third_party/boringssl/src/pki/cert_errors.cc b/yass/third_party/boringssl/src/pki/cert_errors.cc index 3f5a77f781..f54903eaca 100644 --- a/yass/third_party/boringssl/src/pki/cert_errors.cc +++ b/yass/third_party/boringssl/src/pki/cert_errors.cc @@ -139,6 +139,10 @@ const CertErrors *CertPathErrors::GetErrorsForCert(size_t cert_index) const { CertErrors *CertPathErrors::GetOtherErrors() { return &other_errors_; } +const CertErrors *CertPathErrors::GetOtherErrors() const { + return &other_errors_; +} + bool CertPathErrors::ContainsError(CertErrorId id) const { for (const CertErrors &errors : cert_errors_) { if (errors.ContainsError(id)) { @@ -168,6 +172,28 @@ bool CertPathErrors::ContainsAnyErrorWithSeverity( return false; } +std::optional CertPathErrors::FindSingleHighSeverityError( + ptrdiff_t &out_depth) const { + std::optional id_seen; + for (ptrdiff_t i = -1; i < (ptrdiff_t)cert_errors_.size(); ++i) { + const CertErrors *errors = + (i < 0) ? GetOtherErrors() : GetErrorsForCert(i); + for (const CertError &node : errors->nodes_) { + if (node.severity == CertError::SEVERITY_HIGH) { + if (!id_seen.has_value()) { + id_seen = node.id; + out_depth = i; + } else { + if (id_seen.value() != node.id) { + return {}; + } + } + } + } + } + return id_seen; +} + std::string CertPathErrors::ToDebugString( const ParsedCertificateList &certs) const { std::ostringstream result; diff --git a/yass/third_party/boringssl/src/pki/cert_errors.h b/yass/third_party/boringssl/src/pki/cert_errors.h index da350607f8..43ea7fcdde 100644 --- a/yass/third_party/boringssl/src/pki/cert_errors.h +++ b/yass/third_party/boringssl/src/pki/cert_errors.h @@ -57,6 +57,7 @@ namespace bssl { class CertErrorParams; +class CertPathErrors; // CertError represents either an error or a warning. struct OPENSSL_EXPORT CertError { @@ -117,6 +118,7 @@ class OPENSSL_EXPORT CertErrors { bool ContainsAnyErrorWithSeverity(CertError::Severity severity) const; private: + friend CertPathErrors; std::vector nodes_; }; @@ -144,6 +146,7 @@ class OPENSSL_EXPORT CertPathErrors { // Returns a bucket to put errors that are not associated with a particular // certificate. CertErrors *GetOtherErrors(); + const CertErrors *GetOtherErrors() const; // Returns true if CertPathErrors contains the specified error (of any // severity). @@ -152,6 +155,13 @@ class OPENSSL_EXPORT CertPathErrors { // Returns true if this contains any errors of the given severity level. bool ContainsAnyErrorWithSeverity(CertError::Severity severity) const; + // If the path contains only one unique high severity error, return the + // error id and sets |out_depth| to the depth at which the error was + // first seen. A depth of -1 means the error is not associated with + // a single certificate of the path. + std::optional FindSingleHighSeverityError( + ptrdiff_t &out_depth) const; + // Shortcut for ContainsAnyErrorWithSeverity(CertError::SEVERITY_HIGH). bool ContainsHighSeverityErrors() const { return ContainsAnyErrorWithSeverity(CertError::SEVERITY_HIGH); diff --git a/yass/third_party/boringssl/src/pki/path_builder.cc b/yass/third_party/boringssl/src/pki/path_builder.cc index a7fddd3ef6..ef23172815 100644 --- a/yass/third_party/boringssl/src/pki/path_builder.cc +++ b/yass/third_party/boringssl/src/pki/path_builder.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include "cert_issuer_source.h" @@ -710,6 +711,121 @@ bool CertPathBuilderResultPath::IsValid() const { return GetTrustedCert() && !errors.ContainsHighSeverityErrors(); } +VerifyError CertPathBuilderResultPath::GetVerifyError() const { + // Diagnostic string is always "everything" about the path. + std::string diagnostic = errors.ToDebugString(certs); + if (!errors.ContainsHighSeverityErrors()) { + // TODO(bbe3): Having to check this after seems awkward: crbug.com/boringssl/713 + if (GetTrustedCert()) { + return VerifyError(VerifyError::StatusCode::PATH_VERIFIED, 0, + std::move(diagnostic)); + } else { + return VerifyError(VerifyError::StatusCode::VERIFICATION_FAILURE, -1, + std::move(diagnostic)); + } + } + + // Check for the presence of things that amount to Internal errors in the + // verification code. We deliberately prioritize this to not hide it in + // multiple error cases. + if (errors.ContainsError(cert_errors::kInternalError) || + errors.ContainsError(cert_errors::kChainIsEmpty)) { + return VerifyError(VerifyError::StatusCode::VERIFICATION_FAILURE, -1, + std::move(diagnostic)); + } + + // Similarly, for the deadline and limit cases, there will often be other + // errors that we probably do not care about, since path building was + // aborted. Surface these errors instead of having them hidden in the multiple + // error case. + // + // Normally callers should check for these in the path builder result before + // calling this on a single path, but this is here in case they do not and + // these errors are actually present on this path. + if (errors.ContainsError(cert_errors::kDeadlineExceeded)) { + return VerifyError(VerifyError::StatusCode::PATH_DEADLINE_EXCEEDED, -1, + std::move(diagnostic)); + } + if (errors.ContainsError(cert_errors::kIterationLimitExceeded)) { + return VerifyError(VerifyError::StatusCode::PATH_ITERATION_COUNT_EXCEEDED, + -1, std::move(diagnostic)); + } + if (errors.ContainsError(cert_errors::kDepthLimitExceeded)) { + return VerifyError(VerifyError::StatusCode::PATH_DEPTH_LIMIT_REACHED, -1, + std::move(diagnostic)); + } + + // If the chain has multiple high severity errors, indicate that. + ptrdiff_t depth = -1; + std::optional single_error = + errors.FindSingleHighSeverityError(depth); + if (!single_error.has_value()) { + return VerifyError(VerifyError::StatusCode::PATH_MULTIPLE_ERRORS, -1, + std::move(diagnostic)); + } + + // Otherwise it has a single error, map it appropriately at the + // depth it first occurs. + if (single_error.value() == cert_errors::kValidityFailedNotAfter) { + return VerifyError(VerifyError::StatusCode::CERTIFICATE_EXPIRED, depth, + std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kValidityFailedNotBefore) { + return VerifyError(VerifyError::StatusCode::CERTIFICATE_NOT_YET_VALID, + depth, std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kDistrustedByTrustStore || + single_error.value() == cert_errors::kCertIsNotTrustAnchor || + single_error.value() == cert_errors::kMaxPathLengthViolated || + single_error.value() == cert_errors::kSubjectDoesNotMatchIssuer || + single_error.value() == cert_errors::kNoIssuersFound) { + return VerifyError(VerifyError::StatusCode::PATH_NOT_FOUND, depth, + std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kVerifySignedDataFailed) { + return VerifyError(VerifyError::StatusCode::CERTIFICATE_INVALID_SIGNATURE, + depth, std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kUnacceptableSignatureAlgorithm) { + return VerifyError( + VerifyError::StatusCode::CERTIFICATE_UNSUPPORTED_SIGNATURE_ALGORITHM, + depth, std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kUnacceptablePublicKey) { + return VerifyError(VerifyError::StatusCode::CERTIFICATE_UNSUPPORTED_KEY, + depth, std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kEkuLacksServerAuth || + single_error.value() == cert_errors::kEkuLacksServerAuthButHasAnyEKU || + single_error.value() == cert_errors::kEkuLacksClientAuth || + single_error.value() == cert_errors::kEkuLacksClientAuthButHasAnyEKU || + single_error.value() == cert_errors::kEkuLacksClientAuthOrServerAuth) { + return VerifyError(VerifyError::StatusCode::CERTIFICATE_NO_MATCHING_EKU, + depth, std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kCertificateRevoked) { + return VerifyError(VerifyError::StatusCode::CERTIFICATE_REVOKED, depth, + std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kNoRevocationMechanism) { + return VerifyError( + VerifyError::StatusCode::CERTIFICATE_NO_REVOCATION_MECHANISM, depth, + std::move(diagnostic)); + } + if (single_error.value() == cert_errors::kUnableToCheckRevocation) { + return VerifyError( + VerifyError::StatusCode::CERTIFICATE_UNABLE_TO_CHECK_REVOCATION, depth, + std::move(diagnostic)); + } + // All other High severity errors map to CERTIFICATE_INVALID if associated + // to a certificate, or VERIFICATION_FAILURE if not associated to a + // certificate. + return VerifyError((depth < 0) ? VerifyError::StatusCode::VERIFICATION_FAILURE + : VerifyError::StatusCode::CERTIFICATE_INVALID, + depth, std::move(diagnostic)); +} + + CertPathBuilder::Result::Result() = default; CertPathBuilder::Result::Result(Result &&) = default; CertPathBuilder::Result::~Result() = default; @@ -730,6 +846,39 @@ bool CertPathBuilder::Result::AnyPathContainsError(CertErrorId error_id) const { return false; } +const VerifyError CertPathBuilder::Result::GetBestPathVerifyError() const { + if (HasValidPath()) { + return GetBestValidPath()->GetVerifyError(); + } + // We can only return one error. Returning the errors corresponding to the + // limits if they they appear on any path will make this error prominent even + // if there are other paths with different or multiple errors. + if (exceeded_iteration_limit) { + return VerifyError( + VerifyError::StatusCode::PATH_ITERATION_COUNT_EXCEEDED, -1, + "Iteration count exceeded, could not find a trusted path."); + } + if (exceeded_deadline) { + return VerifyError(VerifyError::StatusCode::PATH_DEADLINE_EXCEEDED, -1, + "Deadline exceeded. Could not find a trusted path."); + } + if (AnyPathContainsError(cert_errors::kDepthLimitExceeded)) { + return VerifyError(VerifyError::StatusCode::PATH_DEPTH_LIMIT_REACHED, -1, + "Depth limit reached. Could not find a trusted path."); + } + + // If there are no paths to report an error on, this probably indicates + // something is wrong with this path builder result. + if (paths.empty()) { + return VerifyError(VerifyError::StatusCode::VERIFICATION_FAILURE, -1, + "No paths in path builder result."); + } + + // If there are paths, report the VerifyError from the best path. + CertPathBuilderResultPath *path = paths[best_result_index].get(); + return path->GetVerifyError(); +} + const CertPathBuilderResultPath *CertPathBuilder::Result::GetBestValidPath() const { const CertPathBuilderResultPath *result_path = GetBestPathPossiblyInvalid(); diff --git a/yass/third_party/boringssl/src/pki/path_builder.h b/yass/third_party/boringssl/src/pki/path_builder.h index 05999b0e43..fd207f7c9e 100644 --- a/yass/third_party/boringssl/src/pki/path_builder.h +++ b/yass/third_party/boringssl/src/pki/path_builder.h @@ -9,6 +9,7 @@ #include #include +#include #include "cert_errors.h" #include "input.h" @@ -49,6 +50,9 @@ struct OPENSSL_EXPORT CertPathBuilderResultPath { // to it during certificate verification. bool IsValid() const; + // Public verify error result for this candidate path. + VerifyError GetVerifyError() const; + // Returns the chain's root certificate or nullptr if the chain doesn't // chain to a trust anchor. const ParsedCertificate *GetTrustedCert() const; @@ -139,6 +143,9 @@ class OPENSSL_EXPORT CertPathBuilder { // Returns true if any of the attempted paths contain |error_id|. bool AnyPathContainsError(CertErrorId error_id) const; + // Returns the best single error from result, using the best path found. + const VerifyError GetBestPathVerifyError() const; + // Returns the CertPathBuilderResultPath for the best valid path, or nullptr // if there was none. const CertPathBuilderResultPath *GetBestValidPath() const; diff --git a/yass/third_party/boringssl/src/pki/path_builder_unittest.cc b/yass/third_party/boringssl/src/pki/path_builder_unittest.cc index 624c0bc250..05d9619651 100644 --- a/yass/third_party/boringssl/src/pki/path_builder_unittest.cc +++ b/yass/third_party/boringssl/src/pki/path_builder_unittest.cc @@ -230,6 +230,9 @@ TEST_F(PathBuilderMultiRootTest, TargetHasNameAndSpkiOfTrustAnchor) { auto result = path_builder.Run(); ASSERT_TRUE(result.HasValidPath()); + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); const auto &path = *result.GetBestValidPath(); ASSERT_EQ(2U, path.certs.size()); EXPECT_EQ(a_by_b_, path.certs[0]); @@ -252,6 +255,9 @@ TEST_F(PathBuilderMultiRootTest, TargetWithSameNameAsTrustAnchorFails) { EXPECT_FALSE(result.HasValidPath()); EXPECT_EQ(1U, result.max_depth_seen); + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // Test a failed path building when the trust anchor is provided as a @@ -291,6 +297,10 @@ TEST_F(PathBuilderMultiRootTest, SelfSignedTrustAnchorSupplementalCert) { EXPECT_EQ(b_by_c_, path0.certs[0]); EXPECT_EQ(c_by_d_, path0.certs[1]); EXPECT_EQ(d_by_d_, path0.certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::CERTIFICATE_NOT_YET_VALID) + << error.DiagnosticString(); } // Test verifying a certificate that is a trust anchor. @@ -315,6 +325,10 @@ TEST_F(PathBuilderMultiRootTest, TargetIsSelfSignedTrustAnchor) { ASSERT_EQ(2U, path.certs.size()); EXPECT_EQ(e_by_e_, path.certs[0]); EXPECT_EQ(e_by_e_, path.certs[1]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // If the target cert is directly issued by a trust anchor, it should verify @@ -335,6 +349,10 @@ TEST_F(PathBuilderMultiRootTest, TargetDirectlySignedByTrustAnchor) { ASSERT_EQ(2U, path.certs.size()); EXPECT_EQ(a_by_b_, path.certs[0]); EXPECT_EQ(b_by_f_, path.certs[1]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test that async cert queries are not made if the path can be successfully @@ -362,6 +380,10 @@ TEST_F(PathBuilderMultiRootTest, TriesSyncFirst) { EXPECT_TRUE(result.HasValidPath()); EXPECT_EQ(0, async_certs.num_async_gets()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // If async queries are needed, all async sources will be queried @@ -393,6 +415,10 @@ TEST_F(PathBuilderMultiRootTest, TestAsyncSimultaneous) { EXPECT_TRUE(result.HasValidPath()); EXPECT_EQ(1, async_certs1.num_async_gets()); EXPECT_EQ(1, async_certs2.num_async_gets()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test that PathBuilder does not generate longer paths than necessary if one of @@ -421,6 +447,10 @@ TEST_F(PathBuilderMultiRootTest, TestLongChain) { // The result path should be A(B) <- B(C) <- C(D) // not the longer but also valid A(B) <- B(C) <- C(D) <- D(D) EXPECT_EQ(3U, result.GetBestValidPath()->certs.size()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test that PathBuilder will backtrack and try a different path if the first @@ -471,6 +501,10 @@ TEST_F(PathBuilderMultiRootTest, TestBacktracking) { EXPECT_EQ(b_by_c_, path.certs[1]); EXPECT_EQ(c_by_d_, path.certs[2]); EXPECT_EQ(d_by_d_, path.certs[3]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test that if no path to a trust anchor was found, the partial path is @@ -502,6 +536,10 @@ TEST_F(PathBuilderMultiRootTest, TestOnlyPartialPathResult) { EXPECT_EQ(a_by_b_, result.paths[0]->certs[0]); EXPECT_EQ(b_by_f_, result.paths[0]->certs[1]); EXPECT_EQ(f_by_e_, result.paths[0]->certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // Test that if two partial paths are returned, the first is marked as the best @@ -547,6 +585,10 @@ TEST_F(PathBuilderMultiRootTest, TestTwoPartialPathResults) { EXPECT_EQ(a_by_b_, result.paths[1]->certs[0]); EXPECT_EQ(b_by_c_, result.paths[1]->certs[1]); EXPECT_EQ(c_by_d_, result.paths[1]->certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // Test that if no valid path is found, and the first invalid path is a partial @@ -598,6 +640,10 @@ TEST_F(PathBuilderMultiRootTest, TestDistrustedPathPreferredOverPartialPath) { EXPECT_EQ(b_by_c_, path.certs[1]); EXPECT_EQ(c_by_d_, path.certs[2]); EXPECT_EQ(d_by_d_, path.certs[3]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // Test that whichever order CertIssuerSource returns the issuers, the path @@ -639,6 +685,10 @@ TEST_F(PathBuilderMultiRootTest, TestCertIssuerOrdering) { EXPECT_EQ(b_by_c_, path.certs[1]); EXPECT_EQ(c_by_d_, path.certs[2]); EXPECT_EQ(d_by_d_, path.certs[3]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } } @@ -682,10 +732,16 @@ TEST_F(PathBuilderMultiRootTest, TestIterationLimit) { EXPECT_EQ(!insufficient_limit, result.HasValidPath()); EXPECT_EQ(insufficient_limit, result.exceeded_iteration_limit); + VerifyError error = result.GetBestPathVerifyError(); if (insufficient_limit) { EXPECT_EQ(2U, result.iteration_count); + ASSERT_EQ(error.Code(), + VerifyError::StatusCode::PATH_ITERATION_COUNT_EXCEEDED) + << error.DiagnosticString(); } else { EXPECT_EQ(3U, result.iteration_count); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } } } @@ -777,6 +833,10 @@ TEST_F(PathBuilderMultiRootTest, TestVerifyCache) { EXPECT_EQ(delegate_.GetMockVerifyCache()->CacheHits(), i * 2); EXPECT_EQ(delegate_.GetMockVerifyCache()->CacheMisses(), 2U); EXPECT_EQ(delegate_.GetMockVerifyCache()->CacheStores(), 2U); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } } @@ -819,6 +879,10 @@ TEST_F(PathBuilderMultiRootTest, TestDeadline) { EXPECT_EQ(c_by_d_, result.paths[0]->certs[2]); EXPECT_TRUE( result.paths[0]->errors.ContainsError(cert_errors::kDeadlineExceeded)); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_DEADLINE_EXCEEDED) + << error.DiagnosticString(); } TEST_F(PathBuilderMultiRootTest, TestDepthLimit) { @@ -854,16 +918,21 @@ TEST_F(PathBuilderMultiRootTest, TestDepthLimit) { EXPECT_EQ(!insufficient_limit, result.HasValidPath()); EXPECT_EQ(insufficient_limit, result.AnyPathContainsError(cert_errors::kDepthLimitExceeded)); + VerifyError error = result.GetBestPathVerifyError(); if (insufficient_limit) { EXPECT_EQ(2U, result.max_depth_seen); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_DEPTH_LIMIT_REACHED) + << error.DiagnosticString(); } else { EXPECT_EQ(4U, result.max_depth_seen); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } } } TEST_F(PathBuilderMultiRootTest, TestDepthLimitMultiplePaths) { - // This case tests path building backracking due to reaching the path depth + // This case tests path building backtracking due to reaching the path depth // limit. Given the root and issuer certificates below, there can be two paths // from between the leaf to a trusted root, one has length of 3 and the other // has length of 4. These certificates are specifically chosen because path @@ -914,6 +983,10 @@ TEST_F(PathBuilderMultiRootTest, TestDepthLimitMultiplePaths) { EXPECT_EQ(a_by_b_, valid_path->certs[0]); EXPECT_EQ(b_by_c_, valid_path->certs[1]); EXPECT_EQ(c_by_d_, valid_path->certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } TEST_F(PathBuilderMultiRootTest, TestPreCertificate) { @@ -942,6 +1015,10 @@ TEST_F(PathBuilderMultiRootTest, TestPreCertificate) { ASSERT_EQ(1U, result.paths.size()); ASSERT_FALSE(result.paths[0]->IsValid()) << result.paths[0]->errors.ToDebugString(result.paths[0]->certs); + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::CERTIFICATE_INVALID) + << error.DiagnosticString(); + // PreCertificate should be accepted if configured. delegate_.AllowPrecert(); @@ -954,6 +1031,9 @@ TEST_F(PathBuilderMultiRootTest, TestPreCertificate) { ASSERT_EQ(1U, result2.paths.size()); ASSERT_TRUE(result2.paths[0]->IsValid()) << result2.paths[0]->errors.ToDebugString(result.paths[0]->certs); + VerifyError error2 = result2.GetBestPathVerifyError(); + ASSERT_EQ(error2.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error2.DiagnosticString(); } class PathBuilderKeyRolloverTest : public ::testing::Test { @@ -1059,6 +1139,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestRolloverOnlyOldRootTrusted) { EXPECT_EQ(newintermediate_, path0.certs[1]); EXPECT_EQ(newrootrollover_, path0.certs[2]); EXPECT_EQ(oldroot_, path0.certs[3]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Tests that if both old and new roots are trusted it builds a path through @@ -1094,6 +1178,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestRolloverBothRootsTrusted) { // path building. EXPECT_EQ(newintermediate_, path.certs[1]); EXPECT_EQ(newroot_, path.certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // If trust anchor query returned no results, and there are no issuer @@ -1116,6 +1204,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestAnchorsNoMatchAndNoIssuerSources) { EXPECT_FALSE(result.paths[0]->IsValid()); ASSERT_EQ(1U, path.certs.size()); EXPECT_EQ(target_, path.certs[0]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // If a path to a trust anchor could not be found, and the last issuer(s) in @@ -1177,6 +1269,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestReturnsPartialPathEndedByLoopChecker) { EXPECT_EQ(newrootrollover_, path.certs[2]); EXPECT_TRUE(path.errors.ContainsError(cert_errors::kNoIssuersFound)); } + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // Tests that multiple trust root matches on a single path will be considered. @@ -1219,6 +1315,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestMultipleRootMatchesOnlyOneWorks) { EXPECT_EQ(target_, path.certs[0]); EXPECT_EQ(oldintermediate_, path.certs[1]); EXPECT_EQ(oldroot_, path.certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Tests that the path builder doesn't build longer than necessary paths, @@ -1285,6 +1385,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestRolloverLongChain) { EXPECT_EQ(newintermediate_, path2.certs[1]); EXPECT_EQ(newrootrollover_, path2.certs[2]); EXPECT_EQ(oldroot_, path2.certs[3]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Tests that when SetExploreAllPaths is combined with SetIterationLimit the @@ -1343,6 +1447,15 @@ TEST_F(PathBuilderKeyRolloverTest, ExploreAllPathsWithIterationLimit) { auto result = path_builder.Run(); EXPECT_EQ(expectation.expected_num_paths > 0, result.HasValidPath()); + VerifyError error = result.GetBestPathVerifyError(); + if (expectation.expected_num_paths > 0) { + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); + } else { + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_ITERATION_COUNT_EXCEEDED) + << error.DiagnosticString(); + } + if (expectation.partial_path.empty()) { ASSERT_EQ(expectation.expected_num_paths, result.paths.size()); } else { @@ -1485,12 +1598,16 @@ TEST_F(PathBuilderKeyRolloverTest, ExplorePathsWithPathLimit) { EXPECT_EQ(newroot_, path3.certs[2]); EXPECT_EQ(3U, result.max_depth_seen); } + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } } // If the target cert is a trust anchor, however is not itself *signed* by a // trust anchor, then it is not considered valid (the SPKI and name of the -// trust anchor matches the SPKI and subject of the targe certificate, but the +// trust anchor matches the SPKI and subject of the target certificate, but the // rest of the certificate cannot be verified). TEST_F(PathBuilderKeyRolloverTest, TestEndEntityIsTrustRoot) { // Trust newintermediate. @@ -1506,6 +1623,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestEndEntityIsTrustRoot) { auto result = path_builder.Run(); EXPECT_FALSE(result.HasValidPath()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } // If target has same Name+SAN+SPKI as a necessary intermediate, test if a path @@ -1534,6 +1655,11 @@ TEST_F(PathBuilderKeyRolloverTest, // This could actually be OK, but CertPathBuilder does not build the // newroot <- newrootrollover <- oldroot path. EXPECT_FALSE(result.HasValidPath()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), + VerifyError::StatusCode::CERTIFICATE_INVALID_SIGNATURE) + << error.DiagnosticString(); } // If target has same Name+SAN+SPKI as the trust root, test that a (trivial) @@ -1562,6 +1688,10 @@ TEST_F(PathBuilderKeyRolloverTest, ASSERT_EQ(2U, best_result->certs.size()); EXPECT_EQ(newroot_, best_result->certs[0]); EXPECT_EQ(newrootrollover_, best_result->certs[1]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test that PathBuilder will not try the same path twice if multiple @@ -1628,6 +1758,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestDuplicateIntermediates) { EXPECT_EQ(target_, path1.certs[0]); EXPECT_EQ(newintermediate_, path1.certs[1]); EXPECT_EQ(newroot_, path1.certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test when PathBuilder is given a cert via CertIssuerSources that has the same @@ -1671,6 +1805,11 @@ TEST_F(PathBuilderKeyRolloverTest, TestDuplicateIntermediateAndRoot) { // Compare the DER instead of ParsedCertificate pointer, don't care which copy // of newroot was used in the path. EXPECT_EQ(newroot_->der_cert(), path.certs[2]->der_cert()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), + VerifyError::StatusCode::CERTIFICATE_INVALID_SIGNATURE) + << error.DiagnosticString(); } class MockCertIssuerSourceRequest : public CertIssuerSource::Request { @@ -1796,6 +1935,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestMultipleAsyncIssuersFromSingleSource) { EXPECT_EQ(target_, path1.certs[0]); EXPECT_EQ(newintermediate_, path1.certs[1]); EXPECT_EQ(newroot_, path1.certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } // Test that PathBuilder will not try the same path twice if CertIssuerSources @@ -1885,6 +2028,10 @@ TEST_F(PathBuilderKeyRolloverTest, TestDuplicateAsyncIntermediates) { EXPECT_EQ(target_, path1.certs[0]); EXPECT_EQ(newintermediate_, path1.certs[1]); EXPECT_EQ(newroot_, path1.certs[2]); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } class PathBuilderSimpleChainTest : public ::testing::Test { @@ -2061,8 +2208,130 @@ TEST_F(PathBuilderCheckPathAfterVerificationTest, AddsWarningToValidPath) { ASSERT_TRUE(cert1_errors); EXPECT_TRUE(cert1_errors->ContainsErrorWithSeverity( kWarningFromDelegate, CertError::SEVERITY_WARNING)); + + // The warning should not affect the VerifyError + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); } +TEST_F(PathBuilderCheckPathAfterVerificationTest, TestVerifyErrorMapping) { + struct error_mapping { + CertErrorId internal_error; + VerifyError::StatusCode code; + }; + struct error_mapping AllErrors[] = { + {cert_errors::kInternalError, + VerifyError::StatusCode::VERIFICATION_FAILURE}, + {cert_errors::kValidityFailedNotAfter, + VerifyError::StatusCode::CERTIFICATE_EXPIRED}, + {cert_errors::kValidityFailedNotBefore, + VerifyError::StatusCode::CERTIFICATE_NOT_YET_VALID}, + {cert_errors::kDistrustedByTrustStore, + VerifyError::StatusCode::PATH_NOT_FOUND}, + {cert_errors::kSignatureAlgorithmMismatch, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kChainIsEmpty, + VerifyError::StatusCode::VERIFICATION_FAILURE}, + {cert_errors::kUnconsumedCriticalExtension, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kKeyCertSignBitNotSet, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kMaxPathLengthViolated, + VerifyError::StatusCode::PATH_NOT_FOUND}, + {cert_errors::kBasicConstraintsIndicatesNotCa, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kTargetCertShouldNotBeCa, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kMissingBasicConstraints, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kNotPermittedByNameConstraints, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kTooManyNameConstraintChecks, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kSubjectDoesNotMatchIssuer, + VerifyError::StatusCode::PATH_NOT_FOUND}, + {cert_errors::kVerifySignedDataFailed, + VerifyError::StatusCode::CERTIFICATE_INVALID_SIGNATURE}, + {cert_errors::kSignatureAlgorithmsDifferentEncoding, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kEkuLacksServerAuth, + VerifyError::StatusCode::CERTIFICATE_NO_MATCHING_EKU}, + {cert_errors::kEkuLacksServerAuthButHasAnyEKU, + VerifyError::StatusCode::CERTIFICATE_NO_MATCHING_EKU}, + {cert_errors::kEkuLacksClientAuth, + VerifyError::StatusCode::CERTIFICATE_NO_MATCHING_EKU}, + {cert_errors::kEkuLacksClientAuthButHasAnyEKU, + VerifyError::StatusCode::CERTIFICATE_NO_MATCHING_EKU}, + {cert_errors::kEkuLacksClientAuthOrServerAuth, + VerifyError::StatusCode::CERTIFICATE_NO_MATCHING_EKU}, + {cert_errors::kEkuHasProhibitedOCSPSigning, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kEkuHasProhibitedTimeStamping, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kEkuHasProhibitedCodeSigning, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kEkuNotPresent, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kCertIsNotTrustAnchor, + VerifyError::StatusCode::PATH_NOT_FOUND}, + {cert_errors::kNoValidPolicy, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kPolicyMappingAnyPolicy, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kFailedParsingSpki, + VerifyError::StatusCode::CERTIFICATE_INVALID}, + {cert_errors::kUnacceptableSignatureAlgorithm, + VerifyError::StatusCode::CERTIFICATE_UNSUPPORTED_SIGNATURE_ALGORITHM}, + {cert_errors::kUnacceptablePublicKey, + VerifyError::StatusCode::CERTIFICATE_UNSUPPORTED_KEY}, + {cert_errors::kCertificateRevoked, + VerifyError::StatusCode::CERTIFICATE_REVOKED}, + {cert_errors::kNoRevocationMechanism, + VerifyError::StatusCode::CERTIFICATE_NO_REVOCATION_MECHANISM}, + {cert_errors::kUnableToCheckRevocation, + VerifyError::StatusCode::CERTIFICATE_UNABLE_TO_CHECK_REVOCATION}, + {cert_errors::kNoIssuersFound, VerifyError::StatusCode::PATH_NOT_FOUND}, + {cert_errors::kDeadlineExceeded, + VerifyError::StatusCode::PATH_DEADLINE_EXCEEDED}, + {cert_errors::kIterationLimitExceeded, + VerifyError::StatusCode::PATH_ITERATION_COUNT_EXCEEDED}, + {cert_errors::kDepthLimitExceeded, + VerifyError::StatusCode::PATH_DEPTH_LIMIT_REACHED}, + }; + + for (struct error_mapping mapping : AllErrors) { + AddWarningPathBuilderDelegate delegate; + CertPathBuilder::Result result = RunPathBuilder(nullptr, &delegate); + ASSERT_TRUE(result.HasValidPath()); + + CertErrors *errors = + (CertErrors *)result.GetBestValidPath()->errors.GetErrorsForCert(1); + errors->AddError(mapping.internal_error, nullptr); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), mapping.code) + << error.DiagnosticString(); + } +} + +TEST_F(PathBuilderCheckPathAfterVerificationTest, + TestVerifyErrorMulipleMapping) { + AddWarningPathBuilderDelegate delegate; + CertPathBuilder::Result result = RunPathBuilder(nullptr, &delegate); + ASSERT_TRUE(result.HasValidPath()); + + CertErrors *errors = + (CertErrors *)result.GetBestValidPath()->errors.GetErrorsForCert(1); + errors->AddError(cert_errors::kEkuNotPresent, nullptr); + errors->AddError(cert_errors::kNoValidPolicy, nullptr); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_MULTIPLE_ERRORS) + << error.DiagnosticString(); +} + + DEFINE_CERT_ERROR_ID(kErrorFromDelegate, "Error from delegate"); class AddErrorPathBuilderDelegate : public CertPathBuilderDelegateBase { @@ -2089,8 +2358,47 @@ TEST_F(PathBuilderCheckPathAfterVerificationTest, AddsErrorToValidPath) { const CertErrors *cert2_errors = failed_path->errors.GetErrorsForCert(2); ASSERT_TRUE(cert2_errors); EXPECT_TRUE(cert2_errors->ContainsError(kErrorFromDelegate)); + + // The newly defined delegate error should map to CERTIFICATE_INVALID + // since it is associated with a certificate (at index 2) + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::CERTIFICATE_INVALID) + << error.DiagnosticString(); } +class AddOtherErrorPathBuilderDelegate : public CertPathBuilderDelegateBase { + public: + void CheckPathAfterVerification(const CertPathBuilder &path_builder, + CertPathBuilderResultPath *path) override { + path->errors.GetOtherErrors()->AddError(kErrorFromDelegate, nullptr); + } +}; + +TEST_F(PathBuilderCheckPathAfterVerificationTest, AddsErrorToOtherErrors) { + AddOtherErrorPathBuilderDelegate delegate; + CertPathBuilder::Result result = RunPathBuilder(nullptr, &delegate); + + // Verification failed. + ASSERT_FALSE(result.HasValidPath()); + + ASSERT_LT(result.best_result_index, result.paths.size()); + const CertPathBuilderResultPath *failed_path = + result.paths[result.best_result_index].get(); + ASSERT_TRUE(failed_path); + + // An error should have been added to other errors + const CertErrors *other_errors = failed_path->errors.GetOtherErrors(); + ASSERT_TRUE(other_errors); + EXPECT_TRUE(other_errors->ContainsError(kErrorFromDelegate)); + + // The newly defined delegate error should map to VERIFICATION_FAILURE + // since the error is not associated to a certificate. + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::VERIFICATION_FAILURE) + << error.DiagnosticString(); +} + + TEST_F(PathBuilderCheckPathAfterVerificationTest, NoopToAlreadyInvalidPath) { StrictMock delegate; // Just verify that the hook is called (on an invalid path). @@ -2099,6 +2407,10 @@ TEST_F(PathBuilderCheckPathAfterVerificationTest, NoopToAlreadyInvalidPath) { // Run the pathbuilder with certificate at index 1 actively distrusted. CertPathBuilder::Result result = RunPathBuilder(test_.chain[1], &delegate); EXPECT_FALSE(result.HasValidPath()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); } struct DelegateData : public CertPathBuilderDelegateData { @@ -2181,6 +2493,11 @@ TEST(PathBuilderPrioritizationTest, DatePrioritization) { CertPathBuilder::Result result = path_builder.Run(); EXPECT_FALSE(result.HasValidPath()); + + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); + ASSERT_EQ(4U, result.paths.size()); // Path builder should have attempted paths using the intermediates in @@ -2523,6 +2840,9 @@ TEST(PathBuilderPrioritizationTest, KeyIdNameAndSerialPrioritization) { CertPathBuilder::Result result = path_builder.Run(); EXPECT_FALSE(result.HasValidPath()); ASSERT_EQ(3U, result.paths.size()); + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_NOT_FOUND) + << error.DiagnosticString(); // The serial & issuer method is not used in prioritization, so the certs // should have been prioritized based on dates. The test certs have the @@ -2579,6 +2899,9 @@ TEST(PathBuilderPrioritizationTest, SelfIssuedPrioritization) { CertPathBuilder::Result result = path_builder.Run(); EXPECT_TRUE(result.HasValidPath()); + VerifyError error = result.GetBestPathVerifyError(); + ASSERT_EQ(error.Code(), VerifyError::StatusCode::PATH_VERIFIED) + << error.DiagnosticString(); // Path builder should have built paths to both trusted roots. ASSERT_EQ(2U, result.paths.size()); diff --git a/yass/third_party/boringssl/src/pki/signature_algorithm.h b/yass/third_party/boringssl/src/pki/signature_algorithm.h index 2d65be2e24..be654fc01b 100644 --- a/yass/third_party/boringssl/src/pki/signature_algorithm.h +++ b/yass/third_party/boringssl/src/pki/signature_algorithm.h @@ -71,7 +71,7 @@ enum class SignatureAlgorithm { [[nodiscard]] bool ParseHashAlgorithm(der::Input input, DigestAlgorithm *out); // Parses an AlgorithmIdentifier into a signature algorithm and returns it, or -// returns `std::nullopt` if `algorithm_identifer` either cannot be parsed or +// returns `std::nullopt` if `algorithm_identifier` either cannot be parsed or // is not a recognized signature algorithm. OPENSSL_EXPORT std::optional ParseSignatureAlgorithm( der::Input algorithm_identifier); diff --git a/yass/third_party/boringssl/src/pki/test_helpers.cc b/yass/third_party/boringssl/src/pki/test_helpers.cc index b6712af515..490fba5aee 100644 --- a/yass/third_party/boringssl/src/pki/test_helpers.cc +++ b/yass/third_party/boringssl/src/pki/test_helpers.cc @@ -12,9 +12,12 @@ #include #include + #include #include #include + +#include "../crypto/test/test_data.h" #include "cert_error_params.h" #include "cert_errors.h" #include "parser.h" @@ -90,41 +93,6 @@ std::vector SplitString(std::string_view str) { return out; } -bool ReadFileToString(const std::string &path, std::string *out) { - std::ifstream file(path, std::ios::binary); - file.unsetf(std::ios::skipws); - - file.seekg(0, std::ios::end); - if (file.tellg() == -1) { - return false; - } - out->reserve(file.tellg()); - file.seekg(0, std::ios::beg); - - out->assign(std::istreambuf_iterator(file), - std::istreambuf_iterator()); - - return true; -} - -std::string AppendComponent(const std::string &path, - const std::string &component) { - // Append a path component to a path. Use the \ separator if this appears to - // be a Windows path, otherwise the Unix one. - if (path.find(":\\") != std::string::npos) { - return path + "\\" + component; - } - return path + "/" + component; -} - -std::string GetTestRoot(void) { - // We expect our test data to live in "pki" underneath a - // test root directory, or in the current directry. - char *root_from_env = getenv("BORINGSSL_TEST_DATA_ROOT"); - std::string root = root_from_env ? root_from_env : "."; - return AppendComponent(root, "pki"); -} - } // namespace namespace der { @@ -450,18 +418,7 @@ bool ReadVerifyCertChainTestFromFile(const std::string &file_path_ascii, } std::string ReadTestFileToString(const std::string &file_path_ascii) { - // Compute the full path, relative to the src/ directory. - std::string src_root = GetTestRoot(); - std::string filepath = AppendComponent(src_root, file_path_ascii); - - // Read the full contents of the file. - std::string file_data; - if (!ReadFileToString(filepath, &file_data)) { - ADD_FAILURE() << "Couldn't read file: " << filepath; - return std::string(); - } - - return file_data; + return GetTestData(("pki/" + file_path_ascii).c_str()); } void VerifyCertPathErrors(const std::string &expected_errors_str, diff --git a/yass/third_party/boringssl/src/pki/testdata/nist-pkits/BUILD.gn b/yass/third_party/boringssl/src/pki/testdata/nist-pkits/BUILD.gn deleted file mode 100644 index c64c7dd79a..0000000000 --- a/yass/third_party/boringssl/src/pki/testdata/nist-pkits/BUILD.gn +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2016 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -if (is_ios) { - import("//build/config/ios/bundle_data_from_filelist.gni") - - bundle_data_from_filelist("test_bundle_data") { - testonly = true - filelist_name = "test_bundle_data.filelist" - } -} - -# Depend on this to get the data deps necessary to run the PKITS tests in the -# test environment. -group("nist-pkits") { - data = [ - "certs/", - "crls/", - ] -} diff --git a/yass/third_party/boringssl/src/pki/testdata/nist-pkits/test_bundle_data.filelist b/yass/third_party/boringssl/src/pki/testdata/nist-pkits/test_bundle_data.filelist deleted file mode 100644 index a4aea263ef..0000000000 --- a/yass/third_party/boringssl/src/pki/testdata/nist-pkits/test_bundle_data.filelist +++ /dev/null @@ -1,584 +0,0 @@ -# Copyright 2023 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -# NOTE: this file is generated by build/ios/update_bundle_filelist.py -# If it requires updating, you should get a presubmit error with -# instructions on how to regenerate. Otherwise, do not edit. -certs/AllCertificatesNoPoliciesTest2EE.crt -certs/AllCertificatesSamePoliciesTest10EE.crt -certs/AllCertificatesSamePoliciesTest13EE.crt -certs/AllCertificatesanyPolicyTest11EE.crt -certs/AnyPolicyTest14EE.crt -certs/BadCRLIssuerNameCACert.crt -certs/BadCRLSignatureCACert.crt -certs/BadSignedCACert.crt -certs/BadnotAfterDateCACert.crt -certs/BadnotBeforeDateCACert.crt -certs/BasicSelfIssuedCRLSigningKeyCACert.crt -certs/BasicSelfIssuedCRLSigningKeyCRLCert.crt -certs/BasicSelfIssuedNewKeyCACert.crt -certs/BasicSelfIssuedNewKeyOldWithNewCACert.crt -certs/BasicSelfIssuedOldKeyCACert.crt -certs/BasicSelfIssuedOldKeyNewWithOldCACert.crt -certs/CPSPointerQualifierTest20EE.crt -certs/DSACACert.crt -certs/DSAParametersInheritedCACert.crt -certs/DifferentPoliciesTest12EE.crt -certs/DifferentPoliciesTest3EE.crt -certs/DifferentPoliciesTest4EE.crt -certs/DifferentPoliciesTest5EE.crt -certs/DifferentPoliciesTest7EE.crt -certs/DifferentPoliciesTest8EE.crt -certs/DifferentPoliciesTest9EE.crt -certs/GeneralizedTimeCRLnextUpdateCACert.crt -certs/GoodCACert.crt -certs/GoodsubCACert.crt -certs/GoodsubCAPanyPolicyMapping1to2CACert.crt -certs/InvalidBadCRLIssuerNameTest5EE.crt -certs/InvalidBadCRLSignatureTest4EE.crt -certs/InvalidBasicSelfIssuedCRLSigningKeyTest7EE.crt -certs/InvalidBasicSelfIssuedCRLSigningKeyTest8EE.crt -certs/InvalidBasicSelfIssuedNewWithOldTest5EE.crt -certs/InvalidBasicSelfIssuedOldWithNewTest2EE.crt -certs/InvalidCASignatureTest2EE.crt -certs/InvalidCAnotAfterDateTest5EE.crt -certs/InvalidCAnotBeforeDateTest1EE.crt -certs/InvalidDNSnameConstraintsTest31EE.crt -certs/InvalidDNSnameConstraintsTest33EE.crt -certs/InvalidDNSnameConstraintsTest38EE.crt -certs/InvalidDNandRFC822nameConstraintsTest28EE.crt -certs/InvalidDNandRFC822nameConstraintsTest29EE.crt -certs/InvalidDNnameConstraintsTest10EE.crt -certs/InvalidDNnameConstraintsTest12EE.crt -certs/InvalidDNnameConstraintsTest13EE.crt -certs/InvalidDNnameConstraintsTest15EE.crt -certs/InvalidDNnameConstraintsTest16EE.crt -certs/InvalidDNnameConstraintsTest17EE.crt -certs/InvalidDNnameConstraintsTest20EE.crt -certs/InvalidDNnameConstraintsTest2EE.crt -certs/InvalidDNnameConstraintsTest3EE.crt -certs/InvalidDNnameConstraintsTest7EE.crt -certs/InvalidDNnameConstraintsTest8EE.crt -certs/InvalidDNnameConstraintsTest9EE.crt -certs/InvalidDSASignatureTest6EE.crt -certs/InvalidEESignatureTest3EE.crt -certs/InvalidEEnotAfterDateTest6EE.crt -certs/InvalidEEnotBeforeDateTest2EE.crt -certs/InvalidIDPwithindirectCRLTest23EE.crt -certs/InvalidIDPwithindirectCRLTest26EE.crt -certs/InvalidLongSerialNumberTest18EE.crt -certs/InvalidMappingFromanyPolicyTest7EE.crt -certs/InvalidMappingToanyPolicyTest8EE.crt -certs/InvalidMissingCRLTest1EE.crt -certs/InvalidMissingbasicConstraintsTest1EE.crt -certs/InvalidNameChainingOrderTest2EE.crt -certs/InvalidNameChainingTest1EE.crt -certs/InvalidNegativeSerialNumberTest15EE.crt -certs/InvalidOldCRLnextUpdateTest11EE.crt -certs/InvalidPolicyMappingTest10EE.crt -certs/InvalidPolicyMappingTest2EE.crt -certs/InvalidPolicyMappingTest4EE.crt -certs/InvalidRFC822nameConstraintsTest22EE.crt -certs/InvalidRFC822nameConstraintsTest24EE.crt -certs/InvalidRFC822nameConstraintsTest26EE.crt -certs/InvalidRevokedCATest2EE.crt -certs/InvalidRevokedEETest3EE.crt -certs/InvalidSelfIssuedinhibitAnyPolicyTest10EE.crt -certs/InvalidSelfIssuedinhibitAnyPolicyTest8EE.crt -certs/InvalidSelfIssuedinhibitPolicyMappingTest10EE.crt -certs/InvalidSelfIssuedinhibitPolicyMappingTest11EE.crt -certs/InvalidSelfIssuedinhibitPolicyMappingTest8EE.crt -certs/InvalidSelfIssuedinhibitPolicyMappingTest9EE.crt -certs/InvalidSelfIssuedpathLenConstraintTest16EE.crt -certs/InvalidSelfIssuedrequireExplicitPolicyTest7EE.crt -certs/InvalidSelfIssuedrequireExplicitPolicyTest8EE.crt -certs/InvalidSeparateCertificateandCRLKeysTest20EE.crt -certs/InvalidSeparateCertificateandCRLKeysTest21EE.crt -certs/InvalidURInameConstraintsTest35EE.crt -certs/InvalidURInameConstraintsTest37EE.crt -certs/InvalidUnknownCRLEntryExtensionTest8EE.crt -certs/InvalidUnknownCRLExtensionTest10EE.crt -certs/InvalidUnknownCRLExtensionTest9EE.crt -certs/InvalidUnknownCriticalCertificateExtensionTest2EE.crt -certs/InvalidWrongCRLTest6EE.crt -certs/InvalidcAFalseTest2EE.crt -certs/InvalidcAFalseTest3EE.crt -certs/InvalidcRLIssuerTest27EE.crt -certs/InvalidcRLIssuerTest31EE.crt -certs/InvalidcRLIssuerTest32EE.crt -certs/InvalidcRLIssuerTest34EE.crt -certs/InvalidcRLIssuerTest35EE.crt -certs/InvaliddeltaCRLIndicatorNoBaseTest1EE.crt -certs/InvaliddeltaCRLTest10EE.crt -certs/InvaliddeltaCRLTest3EE.crt -certs/InvaliddeltaCRLTest4EE.crt -certs/InvaliddeltaCRLTest6EE.crt -certs/InvaliddeltaCRLTest9EE.crt -certs/InvaliddistributionPointTest2EE.crt -certs/InvaliddistributionPointTest3EE.crt -certs/InvaliddistributionPointTest6EE.crt -certs/InvaliddistributionPointTest8EE.crt -certs/InvaliddistributionPointTest9EE.crt -certs/InvalidinhibitAnyPolicyTest1EE.crt -certs/InvalidinhibitAnyPolicyTest4EE.crt -certs/InvalidinhibitAnyPolicyTest5EE.crt -certs/InvalidinhibitAnyPolicyTest6EE.crt -certs/InvalidinhibitPolicyMappingTest1EE.crt -certs/InvalidinhibitPolicyMappingTest3EE.crt -certs/InvalidinhibitPolicyMappingTest5EE.crt -certs/InvalidinhibitPolicyMappingTest6EE.crt -certs/InvalidkeyUsageCriticalcRLSignFalseTest4EE.crt -certs/InvalidkeyUsageCriticalkeyCertSignFalseTest1EE.crt -certs/InvalidkeyUsageNotCriticalcRLSignFalseTest5EE.crt -certs/InvalidkeyUsageNotCriticalkeyCertSignFalseTest2EE.crt -certs/InvalidonlyContainsAttributeCertsTest14EE.crt -certs/InvalidonlyContainsCACertsTest12EE.crt -certs/InvalidonlyContainsUserCertsTest11EE.crt -certs/InvalidonlySomeReasonsTest15EE.crt -certs/InvalidonlySomeReasonsTest16EE.crt -certs/InvalidonlySomeReasonsTest17EE.crt -certs/InvalidonlySomeReasonsTest20EE.crt -certs/InvalidonlySomeReasonsTest21EE.crt -certs/InvalidpathLenConstraintTest10EE.crt -certs/InvalidpathLenConstraintTest11EE.crt -certs/InvalidpathLenConstraintTest12EE.crt -certs/InvalidpathLenConstraintTest5EE.crt -certs/InvalidpathLenConstraintTest6EE.crt -certs/InvalidpathLenConstraintTest9EE.crt -certs/Invalidpre2000CRLnextUpdateTest12EE.crt -certs/Invalidpre2000UTCEEnotAfterDateTest7EE.crt -certs/InvalidrequireExplicitPolicyTest3EE.crt -certs/InvalidrequireExplicitPolicyTest5EE.crt -certs/LongSerialNumberCACert.crt -certs/Mapping1to2CACert.crt -certs/MappingFromanyPolicyCACert.crt -certs/MappingToanyPolicyCACert.crt -certs/MissingbasicConstraintsCACert.crt -certs/NameOrderingCACert.crt -certs/NegativeSerialNumberCACert.crt -certs/NoCRLCACert.crt -certs/NoPoliciesCACert.crt -certs/NoissuingDistributionPointCACert.crt -certs/OldCRLnextUpdateCACert.crt -certs/OverlappingPoliciesTest6EE.crt -certs/P12Mapping1to3CACert.crt -certs/P12Mapping1to3subCACert.crt -certs/P12Mapping1to3subsubCACert.crt -certs/P1Mapping1to234CACert.crt -certs/P1Mapping1to234subCACert.crt -certs/P1anyPolicyMapping1to2CACert.crt -certs/PanyPolicyMapping1to2CACert.crt -certs/PoliciesP1234CACert.crt -certs/PoliciesP1234subCAP123Cert.crt -certs/PoliciesP1234subsubCAP123P12Cert.crt -certs/PoliciesP123CACert.crt -certs/PoliciesP123subCAP12Cert.crt -certs/PoliciesP123subsubCAP12P1Cert.crt -certs/PoliciesP123subsubCAP12P2Cert.crt -certs/PoliciesP123subsubsubCAP12P2P1Cert.crt -certs/PoliciesP12CACert.crt -certs/PoliciesP12subCAP1Cert.crt -certs/PoliciesP12subsubCAP1P2Cert.crt -certs/PoliciesP2subCA2Cert.crt -certs/PoliciesP2subCACert.crt -certs/PoliciesP3CACert.crt -certs/RFC3280MandatoryAttributeTypesCACert.crt -certs/RFC3280OptionalAttributeTypesCACert.crt -certs/RevokedsubCACert.crt -certs/RolloverfromPrintableStringtoUTF8StringCACert.crt -certs/SeparateCertificateandCRLKeysCA2CRLSigningCert.crt -certs/SeparateCertificateandCRLKeysCA2CertificateSigningCACert.crt -certs/SeparateCertificateandCRLKeysCRLSigningCert.crt -certs/SeparateCertificateandCRLKeysCertificateSigningCACert.crt -certs/TrustAnchorRootCertificate.crt -certs/TwoCRLsCACert.crt -certs/UIDCACert.crt -certs/UTF8StringCaseInsensitiveMatchCACert.crt -certs/UTF8StringEncodedNamesCACert.crt -certs/UnknownCRLEntryExtensionCACert.crt -certs/UnknownCRLExtensionCACert.crt -certs/UserNoticeQualifierTest15EE.crt -certs/UserNoticeQualifierTest16EE.crt -certs/UserNoticeQualifierTest17EE.crt -certs/UserNoticeQualifierTest18EE.crt -certs/UserNoticeQualifierTest19EE.crt -certs/ValidBasicSelfIssuedCRLSigningKeyTest6EE.crt -certs/ValidBasicSelfIssuedNewWithOldTest3EE.crt -certs/ValidBasicSelfIssuedNewWithOldTest4EE.crt -certs/ValidBasicSelfIssuedOldWithNewTest1EE.crt -certs/ValidCertificatePathTest1EE.crt -certs/ValidDNSnameConstraintsTest30EE.crt -certs/ValidDNSnameConstraintsTest32EE.crt -certs/ValidDNandRFC822nameConstraintsTest27EE.crt -certs/ValidDNnameConstraintsTest11EE.crt -certs/ValidDNnameConstraintsTest14EE.crt -certs/ValidDNnameConstraintsTest18EE.crt -certs/ValidDNnameConstraintsTest19EE.crt -certs/ValidDNnameConstraintsTest1EE.crt -certs/ValidDNnameConstraintsTest4EE.crt -certs/ValidDNnameConstraintsTest5EE.crt -certs/ValidDNnameConstraintsTest6EE.crt -certs/ValidDSAParameterInheritanceTest5EE.crt -certs/ValidDSASignaturesTest4EE.crt -certs/ValidGeneralizedTimeCRLnextUpdateTest13EE.crt -certs/ValidGeneralizedTimenotAfterDateTest8EE.crt -certs/ValidGeneralizedTimenotBeforeDateTest4EE.crt -certs/ValidIDPwithindirectCRLTest22EE.crt -certs/ValidIDPwithindirectCRLTest24EE.crt -certs/ValidIDPwithindirectCRLTest25EE.crt -certs/ValidLongSerialNumberTest16EE.crt -certs/ValidLongSerialNumberTest17EE.crt -certs/ValidNameChainingCapitalizationTest5EE.crt -certs/ValidNameChainingWhitespaceTest3EE.crt -certs/ValidNameChainingWhitespaceTest4EE.crt -certs/ValidNameUIDsTest6EE.crt -certs/ValidNegativeSerialNumberTest14EE.crt -certs/ValidNoissuingDistributionPointTest10EE.crt -certs/ValidPolicyMappingTest11EE.crt -certs/ValidPolicyMappingTest12EE.crt -certs/ValidPolicyMappingTest13EE.crt -certs/ValidPolicyMappingTest14EE.crt -certs/ValidPolicyMappingTest1EE.crt -certs/ValidPolicyMappingTest3EE.crt -certs/ValidPolicyMappingTest5EE.crt -certs/ValidPolicyMappingTest6EE.crt -certs/ValidPolicyMappingTest9EE.crt -certs/ValidRFC3280MandatoryAttributeTypesTest7EE.crt -certs/ValidRFC3280OptionalAttributeTypesTest8EE.crt -certs/ValidRFC822nameConstraintsTest21EE.crt -certs/ValidRFC822nameConstraintsTest23EE.crt -certs/ValidRFC822nameConstraintsTest25EE.crt -certs/ValidRolloverfromPrintableStringtoUTF8StringTest10EE.crt -certs/ValidSelfIssuedinhibitAnyPolicyTest7EE.crt -certs/ValidSelfIssuedinhibitAnyPolicyTest9EE.crt -certs/ValidSelfIssuedinhibitPolicyMappingTest7EE.crt -certs/ValidSelfIssuedpathLenConstraintTest15EE.crt -certs/ValidSelfIssuedpathLenConstraintTest17EE.crt -certs/ValidSelfIssuedrequireExplicitPolicyTest6EE.crt -certs/ValidSeparateCertificateandCRLKeysTest19EE.crt -certs/ValidTwoCRLsTest7EE.crt -certs/ValidURInameConstraintsTest34EE.crt -certs/ValidURInameConstraintsTest36EE.crt -certs/ValidUTF8StringCaseInsensitiveMatchTest11EE.crt -certs/ValidUTF8StringEncodedNamesTest9EE.crt -certs/ValidUnknownNotCriticalCertificateExtensionTest1EE.crt -certs/ValidbasicConstraintsNotCriticalTest4EE.crt -certs/ValidcRLIssuerTest28EE.crt -certs/ValidcRLIssuerTest29EE.crt -certs/ValidcRLIssuerTest30EE.crt -certs/ValidcRLIssuerTest33EE.crt -certs/ValiddeltaCRLTest2EE.crt -certs/ValiddeltaCRLTest5EE.crt -certs/ValiddeltaCRLTest7EE.crt -certs/ValiddeltaCRLTest8EE.crt -certs/ValiddistributionPointTest1EE.crt -certs/ValiddistributionPointTest4EE.crt -certs/ValiddistributionPointTest5EE.crt -certs/ValiddistributionPointTest7EE.crt -certs/ValidinhibitAnyPolicyTest2EE.crt -certs/ValidinhibitPolicyMappingTest2EE.crt -certs/ValidinhibitPolicyMappingTest4EE.crt -certs/ValidkeyUsageNotCriticalTest3EE.crt -certs/ValidonlyContainsCACertsTest13EE.crt -certs/ValidonlySomeReasonsTest18EE.crt -certs/ValidonlySomeReasonsTest19EE.crt -certs/ValidpathLenConstraintTest13EE.crt -certs/ValidpathLenConstraintTest14EE.crt -certs/ValidpathLenConstraintTest7EE.crt -certs/ValidpathLenConstraintTest8EE.crt -certs/Validpre2000UTCnotBeforeDateTest3EE.crt -certs/ValidrequireExplicitPolicyTest1EE.crt -certs/ValidrequireExplicitPolicyTest2EE.crt -certs/ValidrequireExplicitPolicyTest4EE.crt -certs/WrongCRLCACert.crt -certs/anyPolicyCACert.crt -certs/basicConstraintsCriticalcAFalseCACert.crt -certs/basicConstraintsNotCriticalCACert.crt -certs/basicConstraintsNotCriticalcAFalseCACert.crt -certs/deltaCRLCA1Cert.crt -certs/deltaCRLCA2Cert.crt -certs/deltaCRLCA3Cert.crt -certs/deltaCRLIndicatorNoBaseCACert.crt -certs/distributionPoint1CACert.crt -certs/distributionPoint2CACert.crt -certs/indirectCRLCA1Cert.crt -certs/indirectCRLCA2Cert.crt -certs/indirectCRLCA3Cert.crt -certs/indirectCRLCA3cRLIssuerCert.crt -certs/indirectCRLCA4Cert.crt -certs/indirectCRLCA4cRLIssuerCert.crt -certs/indirectCRLCA5Cert.crt -certs/indirectCRLCA6Cert.crt -certs/inhibitAnyPolicy0CACert.crt -certs/inhibitAnyPolicy1CACert.crt -certs/inhibitAnyPolicy1SelfIssuedCACert.crt -certs/inhibitAnyPolicy1SelfIssuedsubCA2Cert.crt -certs/inhibitAnyPolicy1subCA1Cert.crt -certs/inhibitAnyPolicy1subCA2Cert.crt -certs/inhibitAnyPolicy1subCAIAP5Cert.crt -certs/inhibitAnyPolicy1subsubCA2Cert.crt -certs/inhibitAnyPolicy5CACert.crt -certs/inhibitAnyPolicy5subCACert.crt -certs/inhibitAnyPolicy5subsubCACert.crt -certs/inhibitAnyPolicyTest3EE.crt -certs/inhibitPolicyMapping0CACert.crt -certs/inhibitPolicyMapping0subCACert.crt -certs/inhibitPolicyMapping1P12CACert.crt -certs/inhibitPolicyMapping1P12subCACert.crt -certs/inhibitPolicyMapping1P12subCAIPM5Cert.crt -certs/inhibitPolicyMapping1P12subsubCACert.crt -certs/inhibitPolicyMapping1P12subsubCAIPM5Cert.crt -certs/inhibitPolicyMapping1P1CACert.crt -certs/inhibitPolicyMapping1P1SelfIssuedCACert.crt -certs/inhibitPolicyMapping1P1SelfIssuedsubCACert.crt -certs/inhibitPolicyMapping1P1subCACert.crt -certs/inhibitPolicyMapping1P1subsubCACert.crt -certs/inhibitPolicyMapping5CACert.crt -certs/inhibitPolicyMapping5subCACert.crt -certs/inhibitPolicyMapping5subsubCACert.crt -certs/inhibitPolicyMapping5subsubsubCACert.crt -certs/keyUsageCriticalcRLSignFalseCACert.crt -certs/keyUsageCriticalkeyCertSignFalseCACert.crt -certs/keyUsageNotCriticalCACert.crt -certs/keyUsageNotCriticalcRLSignFalseCACert.crt -certs/keyUsageNotCriticalkeyCertSignFalseCACert.crt -certs/nameConstraintsDN1CACert.crt -certs/nameConstraintsDN1SelfIssuedCACert.crt -certs/nameConstraintsDN1subCA1Cert.crt -certs/nameConstraintsDN1subCA2Cert.crt -certs/nameConstraintsDN1subCA3Cert.crt -certs/nameConstraintsDN2CACert.crt -certs/nameConstraintsDN3CACert.crt -certs/nameConstraintsDN3subCA1Cert.crt -certs/nameConstraintsDN3subCA2Cert.crt -certs/nameConstraintsDN4CACert.crt -certs/nameConstraintsDN5CACert.crt -certs/nameConstraintsDNS1CACert.crt -certs/nameConstraintsDNS2CACert.crt -certs/nameConstraintsRFC822CA1Cert.crt -certs/nameConstraintsRFC822CA2Cert.crt -certs/nameConstraintsRFC822CA3Cert.crt -certs/nameConstraintsURI1CACert.crt -certs/nameConstraintsURI2CACert.crt -certs/onlyContainsAttributeCertsCACert.crt -certs/onlyContainsCACertsCACert.crt -certs/onlyContainsUserCertsCACert.crt -certs/onlySomeReasonsCA1Cert.crt -certs/onlySomeReasonsCA2Cert.crt -certs/onlySomeReasonsCA3Cert.crt -certs/onlySomeReasonsCA4Cert.crt -certs/pathLenConstraint0CACert.crt -certs/pathLenConstraint0SelfIssuedCACert.crt -certs/pathLenConstraint0subCA2Cert.crt -certs/pathLenConstraint0subCACert.crt -certs/pathLenConstraint1CACert.crt -certs/pathLenConstraint1SelfIssuedCACert.crt -certs/pathLenConstraint1SelfIssuedsubCACert.crt -certs/pathLenConstraint1subCACert.crt -certs/pathLenConstraint6CACert.crt -certs/pathLenConstraint6subCA0Cert.crt -certs/pathLenConstraint6subCA1Cert.crt -certs/pathLenConstraint6subCA4Cert.crt -certs/pathLenConstraint6subsubCA00Cert.crt -certs/pathLenConstraint6subsubCA11Cert.crt -certs/pathLenConstraint6subsubCA41Cert.crt -certs/pathLenConstraint6subsubsubCA11XCert.crt -certs/pathLenConstraint6subsubsubCA41XCert.crt -certs/pre2000CRLnextUpdateCACert.crt -certs/requireExplicitPolicy0CACert.crt -certs/requireExplicitPolicy0subCACert.crt -certs/requireExplicitPolicy0subsubCACert.crt -certs/requireExplicitPolicy0subsubsubCACert.crt -certs/requireExplicitPolicy10CACert.crt -certs/requireExplicitPolicy10subCACert.crt -certs/requireExplicitPolicy10subsubCACert.crt -certs/requireExplicitPolicy10subsubsubCACert.crt -certs/requireExplicitPolicy2CACert.crt -certs/requireExplicitPolicy2SelfIssuedCACert.crt -certs/requireExplicitPolicy2SelfIssuedsubCACert.crt -certs/requireExplicitPolicy2subCACert.crt -certs/requireExplicitPolicy4CACert.crt -certs/requireExplicitPolicy4subCACert.crt -certs/requireExplicitPolicy4subsubCACert.crt -certs/requireExplicitPolicy4subsubsubCACert.crt -certs/requireExplicitPolicy5CACert.crt -certs/requireExplicitPolicy5subCACert.crt -certs/requireExplicitPolicy5subsubCACert.crt -certs/requireExplicitPolicy5subsubsubCACert.crt -certs/requireExplicitPolicy7CACert.crt -certs/requireExplicitPolicy7subCARE2Cert.crt -certs/requireExplicitPolicy7subsubCARE2RE4Cert.crt -certs/requireExplicitPolicy7subsubsubCARE2RE4Cert.crt -crls/BadCRLIssuerNameCACRL.crl -crls/BadCRLSignatureCACRL.crl -crls/BadSignedCACRL.crl -crls/BadnotAfterDateCACRL.crl -crls/BadnotBeforeDateCACRL.crl -crls/BasicSelfIssuedCRLSigningKeyCACRL.crl -crls/BasicSelfIssuedCRLSigningKeyCRLCertCRL.crl -crls/BasicSelfIssuedNewKeyCACRL.crl -crls/BasicSelfIssuedOldKeyCACRL.crl -crls/BasicSelfIssuedOldKeySelfIssuedCertCRL.crl -crls/DSACACRL.crl -crls/DSAParametersInheritedCACRL.crl -crls/GeneralizedTimeCRLnextUpdateCACRL.crl -crls/GoodCACRL.crl -crls/GoodsubCACRL.crl -crls/GoodsubCAPanyPolicyMapping1to2CACRL.crl -crls/LongSerialNumberCACRL.crl -crls/Mapping1to2CACRL.crl -crls/MappingFromanyPolicyCACRL.crl -crls/MappingToanyPolicyCACRL.crl -crls/MissingbasicConstraintsCACRL.crl -crls/NameOrderCACRL.crl -crls/NegativeSerialNumberCACRL.crl -crls/NoPoliciesCACRL.crl -crls/NoissuingDistributionPointCACRL.crl -crls/OldCRLnextUpdateCACRL.crl -crls/P12Mapping1to3CACRL.crl -crls/P12Mapping1to3subCACRL.crl -crls/P12Mapping1to3subsubCACRL.crl -crls/P1Mapping1to234CACRL.crl -crls/P1Mapping1to234subCACRL.crl -crls/P1anyPolicyMapping1to2CACRL.crl -crls/PanyPolicyMapping1to2CACRL.crl -crls/PoliciesP1234CACRL.crl -crls/PoliciesP1234subCAP123CRL.crl -crls/PoliciesP1234subsubCAP123P12CRL.crl -crls/PoliciesP123CACRL.crl -crls/PoliciesP123subCAP12CRL.crl -crls/PoliciesP123subsubCAP12P1CRL.crl -crls/PoliciesP123subsubCAP2P2CRL.crl -crls/PoliciesP123subsubsubCAP12P2P1CRL.crl -crls/PoliciesP12CACRL.crl -crls/PoliciesP12subCAP1CRL.crl -crls/PoliciesP12subsubCAP1P2CRL.crl -crls/PoliciesP2subCA2CRL.crl -crls/PoliciesP2subCACRL.crl -crls/PoliciesP3CACRL.crl -crls/RFC3280MandatoryAttributeTypesCACRL.crl -crls/RFC3280OptionalAttributeTypesCACRL.crl -crls/RevokedsubCACRL.crl -crls/RolloverfromPrintableStringtoUTF8StringCACRL.crl -crls/SeparateCertificateandCRLKeysCA2CRL.crl -crls/SeparateCertificateandCRLKeysCRL.crl -crls/TrustAnchorRootCRL.crl -crls/TwoCRLsCABadCRL.crl -crls/TwoCRLsCAGoodCRL.crl -crls/UIDCACRL.crl -crls/UTF8StringCaseInsensitiveMatchCACRL.crl -crls/UTF8StringEncodedNamesCACRL.crl -crls/UnknownCRLEntryExtensionCACRL.crl -crls/UnknownCRLExtensionCACRL.crl -crls/WrongCRLCACRL.crl -crls/anyPolicyCACRL.crl -crls/basicConstraintsCriticalcAFalseCACRL.crl -crls/basicConstraintsNotCriticalCACRL.crl -crls/basicConstraintsNotCriticalcAFalseCACRL.crl -crls/deltaCRLCA1CRL.crl -crls/deltaCRLCA1deltaCRL.crl -crls/deltaCRLCA2CRL.crl -crls/deltaCRLCA2deltaCRL.crl -crls/deltaCRLCA3CRL.crl -crls/deltaCRLCA3deltaCRL.crl -crls/deltaCRLIndicatorNoBaseCACRL.crl -crls/distributionPoint1CACRL.crl -crls/distributionPoint2CACRL.crl -crls/indirectCRLCA1CRL.crl -crls/indirectCRLCA3CRL.crl -crls/indirectCRLCA3cRLIssuerCRL.crl -crls/indirectCRLCA4cRLIssuerCRL.crl -crls/indirectCRLCA5CRL.crl -crls/inhibitAnyPolicy0CACRL.crl -crls/inhibitAnyPolicy1CACRL.crl -crls/inhibitAnyPolicy1subCA1CRL.crl -crls/inhibitAnyPolicy1subCA2CRL.crl -crls/inhibitAnyPolicy1subCAIAP5CRL.crl -crls/inhibitAnyPolicy1subsubCA2CRL.crl -crls/inhibitAnyPolicy5CACRL.crl -crls/inhibitAnyPolicy5subCACRL.crl -crls/inhibitAnyPolicy5subsubCACRL.crl -crls/inhibitPolicyMapping0CACRL.crl -crls/inhibitPolicyMapping0subCACRL.crl -crls/inhibitPolicyMapping1P12CACRL.crl -crls/inhibitPolicyMapping1P12subCACRL.crl -crls/inhibitPolicyMapping1P12subCAIPM5CRL.crl -crls/inhibitPolicyMapping1P12subsubCACRL.crl -crls/inhibitPolicyMapping1P12subsubCAIPM5CRL.crl -crls/inhibitPolicyMapping1P1CACRL.crl -crls/inhibitPolicyMapping1P1subCACRL.crl -crls/inhibitPolicyMapping1P1subsubCACRL.crl -crls/inhibitPolicyMapping5CACRL.crl -crls/inhibitPolicyMapping5subCACRL.crl -crls/inhibitPolicyMapping5subsubCACRL.crl -crls/inhibitPolicyMapping5subsubsubCACRL.crl -crls/keyUsageCriticalcRLSignFalseCACRL.crl -crls/keyUsageCriticalkeyCertSignFalseCACRL.crl -crls/keyUsageNotCriticalCACRL.crl -crls/keyUsageNotCriticalcRLSignFalseCACRL.crl -crls/keyUsageNotCriticalkeyCertSignFalseCACRL.crl -crls/nameConstraintsDN1CACRL.crl -crls/nameConstraintsDN1subCA1CRL.crl -crls/nameConstraintsDN1subCA2CRL.crl -crls/nameConstraintsDN1subCA3CRL.crl -crls/nameConstraintsDN2CACRL.crl -crls/nameConstraintsDN3CACRL.crl -crls/nameConstraintsDN3subCA1CRL.crl -crls/nameConstraintsDN3subCA2CRL.crl -crls/nameConstraintsDN4CACRL.crl -crls/nameConstraintsDN5CACRL.crl -crls/nameConstraintsDNS1CACRL.crl -crls/nameConstraintsDNS2CACRL.crl -crls/nameConstraintsRFC822CA1CRL.crl -crls/nameConstraintsRFC822CA2CRL.crl -crls/nameConstraintsRFC822CA3CRL.crl -crls/nameConstraintsURI1CACRL.crl -crls/nameConstraintsURI2CACRL.crl -crls/onlyContainsAttributeCertsCACRL.crl -crls/onlyContainsCACertsCACRL.crl -crls/onlyContainsUserCertsCACRL.crl -crls/onlySomeReasonsCA1compromiseCRL.crl -crls/onlySomeReasonsCA1otherreasonsCRL.crl -crls/onlySomeReasonsCA2CRL1.crl -crls/onlySomeReasonsCA2CRL2.crl -crls/onlySomeReasonsCA3compromiseCRL.crl -crls/onlySomeReasonsCA3otherreasonsCRL.crl -crls/onlySomeReasonsCA4compromiseCRL.crl -crls/onlySomeReasonsCA4otherreasonsCRL.crl -crls/pathLenConstraint0CACRL.crl -crls/pathLenConstraint0subCA2CRL.crl -crls/pathLenConstraint0subCACRL.crl -crls/pathLenConstraint1CACRL.crl -crls/pathLenConstraint1subCACRL.crl -crls/pathLenConstraint6CACRL.crl -crls/pathLenConstraint6subCA0CRL.crl -crls/pathLenConstraint6subCA1CRL.crl -crls/pathLenConstraint6subCA4CRL.crl -crls/pathLenConstraint6subsubCA00CRL.crl -crls/pathLenConstraint6subsubCA11CRL.crl -crls/pathLenConstraint6subsubCA41CRL.crl -crls/pathLenConstraint6subsubsubCA11XCRL.crl -crls/pathLenConstraint6subsubsubCA41XCRL.crl -crls/pre2000CRLnextUpdateCACRL.crl -crls/requireExplicitPolicy0CACRL.crl -crls/requireExplicitPolicy0subCACRL.crl -crls/requireExplicitPolicy0subsubCACRL.crl -crls/requireExplicitPolicy0subsubsubCACRL.crl -crls/requireExplicitPolicy10CACRL.crl -crls/requireExplicitPolicy10subCACRL.crl -crls/requireExplicitPolicy10subsubCACRL.crl -crls/requireExplicitPolicy10subsubsubCACRL.crl -crls/requireExplicitPolicy2CACRL.crl -crls/requireExplicitPolicy2subCACRL.crl -crls/requireExplicitPolicy4CACRL.crl -crls/requireExplicitPolicy4subCACRL.crl -crls/requireExplicitPolicy4subsubCACRL.crl -crls/requireExplicitPolicy4subsubsubCACRL.crl -crls/requireExplicitPolicy5CACRL.crl -crls/requireExplicitPolicy5subCACRL.crl -crls/requireExplicitPolicy5subsubCACRL.crl -crls/requireExplicitPolicy5subsubsubCACRL.crl -crls/requireExplicitPolicy7CACRL.crl -crls/requireExplicitPolicy7subCARE2CRL.crl -crls/requireExplicitPolicy7subsubCARE2RE4CRL.crl -crls/requireExplicitPolicy7subsubsubCARE2RE4CRL.crl diff --git a/yass/third_party/boringssl/src/pki/testdata/nist-pkits/test_bundle_data.globlist b/yass/third_party/boringssl/src/pki/testdata/nist-pkits/test_bundle_data.globlist deleted file mode 100644 index e74c3d71ea..0000000000 --- a/yass/third_party/boringssl/src/pki/testdata/nist-pkits/test_bundle_data.globlist +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2023 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -# -# See build/ios/update_bundle_filelist.py for details on how .globlist -# files are used to update their .filelist counterparts. - -certs/*.crt -crls/*.crl diff --git a/yass/third_party/boringssl/src/pki/verify_error.cc b/yass/third_party/boringssl/src/pki/verify_error.cc new file mode 100644 index 0000000000..da61236fbc --- /dev/null +++ b/yass/third_party/boringssl/src/pki/verify_error.cc @@ -0,0 +1,16 @@ +#include +#include + +namespace bssl { + +VerifyError::VerifyError(StatusCode code, ptrdiff_t offset, + std::string diagnostic) + : offset_(offset), code_(code), diagnostic_(std::move(diagnostic)) {} + +const std::string &VerifyError::DiagnosticString() const { return diagnostic_; } + +ptrdiff_t VerifyError::Index() const { return offset_; } + +VerifyError::StatusCode VerifyError::Code() const { return code_; } + +} // namespacee bssl diff --git a/yass/third_party/boringssl/src/rust/bssl-sys/build.rs b/yass/third_party/boringssl/src/rust/bssl-sys/build.rs index 91a9f8a8e7..ede6411025 100644 --- a/yass/third_party/boringssl/src/rust/bssl-sys/build.rs +++ b/yass/third_party/boringssl/src/rust/bssl-sys/build.rs @@ -78,6 +78,23 @@ fn get_bssl_build_dir() -> PathBuf { return Path::new(&crate_dir).join("../../build"); } +fn get_cpp_runtime_lib() -> Option { + println!("cargo:rerun-if-env-changed=BORINGSSL_RUST_CPPLIB"); + + if let Ok(cpp_lib) = env::var("BORINGSSL_RUST_CPPLIB") { + return Some(cpp_lib); + } + + if env::var_os("CARGO_CFG_UNIX").is_some() { + match env::var("CARGO_CFG_TARGET_OS").unwrap().as_ref() { + "macos" => Some("c++".into()), + _ => Some("stdc++".into()), + } + } else { + None + } +} + fn main() { let bssl_build_dir = get_bssl_build_dir(); let bssl_sys_build_dir = bssl_build_dir.join("rust/bssl-sys"); @@ -106,5 +123,9 @@ fn main() { ); println!("cargo:rustc-link-lib=static=rust_wrapper"); + if let Some(cpp_lib) = get_cpp_runtime_lib() { + println!("cargo:rustc-link-lib={}", cpp_lib); + } + println!("cargo:conf={}", OSSL_CONF_DEFINES.join(",")); } diff --git a/yass/third_party/boringssl/src/ssl/CMakeLists.txt b/yass/third_party/boringssl/src/ssl/CMakeLists.txt deleted file mode 100644 index 8b1db486a0..0000000000 --- a/yass/third_party/boringssl/src/ssl/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -add_library( - ssl - - bio_ssl.cc - d1_both.cc - d1_lib.cc - d1_pkt.cc - d1_srtp.cc - dtls_method.cc - dtls_record.cc - encrypted_client_hello.cc - extensions.cc - handoff.cc - handshake.cc - handshake_client.cc - handshake_server.cc - s3_both.cc - s3_lib.cc - s3_pkt.cc - ssl_aead_ctx.cc - ssl_asn1.cc - ssl_buffer.cc - ssl_cert.cc - ssl_cipher.cc - ssl_credential.cc - ssl_file.cc - ssl_key_share.cc - ssl_lib.cc - ssl_privkey.cc - ssl_session.cc - ssl_stat.cc - ssl_transcript.cc - ssl_versions.cc - ssl_x509.cc - t1_enc.cc - tls_method.cc - tls_record.cc - tls13_both.cc - tls13_client.cc - tls13_enc.cc - tls13_server.cc -) -# Although libssl also provides headers that require an include directory, the -# flag is already specified by libcrypto, so we omit target_include_directories -# here. -if (BORINGSSL_BUILD_TESTS) -install_if_enabled(TARGETS ssl EXPORT OpenSSLTargets ${INSTALL_DESTINATION_DEFAULT}) -endif() -set_property(TARGET ssl PROPERTY EXPORT_NAME SSL) -target_link_libraries(ssl crypto) diff --git a/yass/third_party/boringssl/src/ssl/dtls_record.cc b/yass/third_party/boringssl/src/ssl/dtls_record.cc index eb3df69627..068864faea 100644 --- a/yass/third_party/boringssl/src/ssl/dtls_record.cc +++ b/yass/third_party/boringssl/src/ssl/dtls_record.cc @@ -127,26 +127,26 @@ BSSL_NAMESPACE_BEGIN // |bitmap| or is stale. Otherwise it returns zero. static bool dtls1_bitmap_should_discard(DTLS1_BITMAP *bitmap, uint64_t seq_num) { - const unsigned kWindowSize = sizeof(bitmap->map) * 8; + const size_t kWindowSize = bitmap->map.size(); if (seq_num > bitmap->max_seq_num) { return false; } uint64_t idx = bitmap->max_seq_num - seq_num; - return idx >= kWindowSize || (bitmap->map & (((uint64_t)1) << idx)); + return idx >= kWindowSize || bitmap->map[idx]; } // dtls1_bitmap_record updates |bitmap| to record receipt of sequence number // |seq_num|. It slides the window forward if needed. It is an error to call // this function on a stale sequence number. static void dtls1_bitmap_record(DTLS1_BITMAP *bitmap, uint64_t seq_num) { - const unsigned kWindowSize = sizeof(bitmap->map) * 8; + const size_t kWindowSize = bitmap->map.size(); // Shift the window if necessary. if (seq_num > bitmap->max_seq_num) { uint64_t shift = seq_num - bitmap->max_seq_num; if (shift >= kWindowSize) { - bitmap->map = 0; + bitmap->map.reset(); } else { bitmap->map <<= shift; } @@ -155,7 +155,7 @@ static void dtls1_bitmap_record(DTLS1_BITMAP *bitmap, uint64_t seq_num) { uint64_t idx = bitmap->max_seq_num - seq_num; if (idx < kWindowSize) { - bitmap->map |= ((uint64_t)1) << idx; + bitmap->map[idx] = true; } } diff --git a/yass/third_party/boringssl/src/ssl/internal.h b/yass/third_party/boringssl/src/ssl/internal.h index 0c2c2f86dc..669cb77756 100644 --- a/yass/third_party/boringssl/src/ssl/internal.h +++ b/yass/third_party/boringssl/src/ssl/internal.h @@ -147,6 +147,7 @@ #include #include +#include #include #include #include @@ -961,9 +962,9 @@ class SSLAEADContext { // DTLS1_BITMAP maintains a sliding window of 64 sequence numbers to detect // replayed packets. It should be initialized by zeroing every field. struct DTLS1_BITMAP { - // map is a bit mask of the last 64 sequence numbers. Bit - // |1< map; // max_seq_num is the largest sequence number seen so far as a 64-bit // integer. uint64_t max_seq_num = 0; diff --git a/yass/third_party/boringssl/src/ssl/test/runner/common.go b/yass/third_party/boringssl/src/ssl/test/runner/common.go index f85a81a893..082efceab1 100644 --- a/yass/third_party/boringssl/src/ssl/test/runner/common.go +++ b/yass/third_party/boringssl/src/ssl/test/runner/common.go @@ -1180,10 +1180,10 @@ type ProtocolBugs struct { // RSA_EXPORT) in the plain RSA key exchange. RSAEphemeralKey bool - // SRTPMasterKeyIdentifer, if not empty, is the SRTP MKI value that the + // SRTPMasterKeyIdentifier, if not empty, is the SRTP MKI value that the // client offers when negotiating SRTP. MKI support is still missing so // the peer must still send none. - SRTPMasterKeyIdentifer string + SRTPMasterKeyIdentifier string // SendSRTPProtectionProfile, if non-zero, is the SRTP profile that the // server sends in the ServerHello instead of the negotiated one. diff --git a/yass/third_party/boringssl/src/ssl/test/runner/handshake_client.go b/yass/third_party/boringssl/src/ssl/test/runner/handshake_client.go index eecb114241..0c338d734a 100644 --- a/yass/third_party/boringssl/src/ssl/test/runner/handshake_client.go +++ b/yass/third_party/boringssl/src/ssl/test/runner/handshake_client.go @@ -521,7 +521,7 @@ func (hs *clientHandshakeState) createClientHello(innerHello *clientHelloMsg, ec channelIDSupported: c.config.ChannelID != nil, extendedMasterSecret: maxVersion >= VersionTLS10, srtpProtectionProfiles: c.config.SRTPProtectionProfiles, - srtpMasterKeyIdentifier: c.config.Bugs.SRTPMasterKeyIdentifer, + srtpMasterKeyIdentifier: c.config.Bugs.SRTPMasterKeyIdentifier, customExtension: c.config.Bugs.CustomExtension, omitExtensions: c.config.Bugs.OmitExtensions, emptyExtensions: c.config.Bugs.EmptyExtensions, diff --git a/yass/third_party/boringssl/src/ssl/test/runner/runner.go b/yass/third_party/boringssl/src/ssl/test/runner/runner.go index 8cc698bdc8..6f2befd1b2 100644 --- a/yass/third_party/boringssl/src/ssl/test/runner/runner.go +++ b/yass/third_party/boringssl/src/ssl/test/runner/runner.go @@ -8331,7 +8331,7 @@ func addExtensionTests() { MaxVersion: ver.version, SRTPProtectionProfiles: []uint16{SRTP_AES128_CM_HMAC_SHA1_80}, Bugs: ProtocolBugs{ - SRTPMasterKeyIdentifer: "bogus", + SRTPMasterKeyIdentifier: "bogus", }, }, flags: []string{ @@ -9897,7 +9897,7 @@ func addDTLSReplayTests() { config: Config{ Bugs: ProtocolBugs{ SequenceNumberMapping: func(in uint64) uint64 { - return in * 127 + return in * 1023 }, }, }, @@ -9912,11 +9912,15 @@ func addDTLSReplayTests() { config: Config{ Bugs: ProtocolBugs{ SequenceNumberMapping: func(in uint64) uint64 { - return in ^ 31 + // This mapping has numbers counting backwards in groups + // of 256, and then jumping forwards 511 numbers. + return in ^ 255 }, }, }, - messageCount: 200, + // This messageCount is large enough to make sure that the SequenceNumberMapping + // will reach the point where it jumps forwards after stepping backwards. + messageCount: 500, replayWrites: true, }) } diff --git a/yass/third_party/boringssl/src/tool/speed.cc b/yass/third_party/boringssl/src/tool/speed.cc index d5244558f4..d3cc87e726 100644 --- a/yass/third_party/boringssl/src/tool/speed.cc +++ b/yass/third_party/boringssl/src/tool/speed.cc @@ -25,6 +25,8 @@ #include #include +#define OPENSSL_I_UNDERSTAND_EXPERIMENTAL_FUNCTION_RISK + #include #include #include @@ -38,6 +40,7 @@ #include #include #include +#define OPENSSL_UNSTABLE_EXPERIMENTAL_SPX #include #include #include @@ -1136,7 +1139,7 @@ static bool SpeedSpx(const std::string &selected) { TimeResults results; if (!TimeFunctionParallel(&results, []() -> bool { uint8_t public_key[32], private_key[64]; - spx_generate_key(public_key, private_key); + SPX_generate_key(public_key, private_key); return true; })) { return false; @@ -1145,12 +1148,12 @@ static bool SpeedSpx(const std::string &selected) { results.Print("SPHINCS+-SHA2-128s key generation"); uint8_t public_key[32], private_key[64]; - spx_generate_key(public_key, private_key); + SPX_generate_key(public_key, private_key); static const uint8_t kMessage[] = {0, 1, 2, 3, 4, 5}; if (!TimeFunctionParallel(&results, [&private_key]() -> bool { uint8_t out[SPX_SIGNATURE_BYTES]; - spx_sign(out, private_key, kMessage, sizeof(kMessage), true); + SPX_sign(out, private_key, kMessage, sizeof(kMessage), true); return true; })) { return false; @@ -1159,10 +1162,10 @@ static bool SpeedSpx(const std::string &selected) { results.Print("SPHINCS+-SHA2-128s signing"); uint8_t signature[SPX_SIGNATURE_BYTES]; - spx_sign(signature, private_key, kMessage, sizeof(kMessage), true); + SPX_sign(signature, private_key, kMessage, sizeof(kMessage), true); if (!TimeFunctionParallel(&results, [&public_key, &signature]() -> bool { - return spx_verify(signature, public_key, kMessage, sizeof(kMessage)) == + return SPX_verify(signature, public_key, kMessage, sizeof(kMessage)) == 1; })) { fprintf(stderr, "SPHINCS+-SHA2-128s verify failed.\n"); diff --git a/yass/third_party/boringssl/src/util/all_tests.go b/yass/third_party/boringssl/src/util/all_tests.go index b2f0b17be5..dad4d4a362 100644 --- a/yass/third_party/boringssl/src/util/all_tests.go +++ b/yass/third_party/boringssl/src/util/all_tests.go @@ -431,5 +431,5 @@ func main() { os.Exit(1) } - fmt.Printf("\nAll tests passed!\n") + fmt.Printf("All unit tests passed!\n") } diff --git a/yass/third_party/boringssl/src/util/bot/DEPS b/yass/third_party/boringssl/src/util/bot/DEPS index 9f004bcea1..b060ca4560 100644 --- a/yass/third_party/boringssl/src/util/bot/DEPS +++ b/yass/third_party/boringssl/src/util/bot/DEPS @@ -26,9 +26,9 @@ vars = { # cipd describe PACKAGE_NAME -version latest # infra/3pp/tools/cmake/linux-amd64 - 'cmake_version': 'version:2@3.28.1.chromium.8', + 'cmake_version': 'version:2@3.28.4.chromium.8', # infra/3pp/tools/go/linux-amd64 - 'go_version': 'version:2@1.21.5', + 'go_version': 'version:2@1.22.1', # infra/3pp/tools/perl/windows-amd64 'perl_version': 'version:2@5.32.1.1', @@ -36,8 +36,8 @@ vars = { # https://chromium.googlesource.com/chromium/src/+/main/DEPS 'android_sdk_platform-tools_version': 'HWVsGs2HCKgSVv41FsOcsfJbNcB0UFiNrF6Tc4yRArYC', 'libfuzzer_revision': '758bd21f103a501b362b1ca46fa8fcb692eaa303', - 'libcxx_revision': 'd0429f0628dd5c2c83a197012b2153b979aff674', - 'libcxxabi_revision': '8806fb8bb26e20206241ea2dfcee4fd2d4157b83', + 'libcxx_revision': '80307e66e74bae927fb8709a549859e777e3bf0b', + 'libcxxabi_revision': 'ec88f0ab262c5f2426dbf138b92ee9ae5961e431', 'ninja_version': 'version:2@1.11.1.chromium.6', # The Android NDK cannot be updated on CI for two reasons: diff --git a/yass/third_party/boringssl/src/util/bot/libcxx-config/__assertion_handler b/yass/third_party/boringssl/src/util/bot/libcxx-config/__assertion_handler new file mode 100644 index 0000000000..22de65ea03 --- /dev/null +++ b/yass/third_party/boringssl/src/util/bot/libcxx-config/__assertion_handler @@ -0,0 +1,9 @@ +#ifndef BORINGSSL_LIBCXX_ASSERTION_HANDLER_ +#define BORINGSSL_LIBCXX_ASSERTION_HANDLER_ + +#include <__verbose_abort> + +// We use only bundle libc++ for testing, so we can use the verbose aborts. +#define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message) + +#endif // BORINGSSL_LIBCXX_ASSERTION_HANDLER_ diff --git a/yass/third_party/boringssl/src/util/bot/update_clang.py b/yass/third_party/boringssl/src/util/bot/update_clang.py index b47c008ec5..b876921f21 100644 --- a/yass/third_party/boringssl/src/util/bot/update_clang.py +++ b/yass/third_party/boringssl/src/util/bot/update_clang.py @@ -29,8 +29,8 @@ except ImportError: # CLANG_REVISION and CLANG_SUB_REVISION determine the build of clang # to use. These should be synced with tools/clang/scripts/update.py in # Chromium. -CLANG_REVISION = 'llvmorg-18-init-14420-gea3a3b25' -CLANG_SUB_REVISION = 3 +CLANG_REVISION = 'llvmorg-19-init-2941-ga0b3dbaf' +CLANG_SUB_REVISION = 22 PACKAGE_VERSION = '%s-%s' % (CLANG_REVISION, CLANG_SUB_REVISION) diff --git a/yass/third_party/boringssl/src/util/bot/vs_toolchain.py b/yass/third_party/boringssl/src/util/bot/vs_toolchain.py index 9e19e22b36..c6dc4a4a10 100644 --- a/yass/third_party/boringssl/src/util/bot/vs_toolchain.py +++ b/yass/third_party/boringssl/src/util/bot/vs_toolchain.py @@ -12,6 +12,7 @@ import sys script_dir = os.path.dirname(os.path.realpath(__file__)) +toolchain_dir = os.path.join(script_dir, 'win_toolchain') json_data_file = os.path.join(script_dir, 'win_toolchain.json') @@ -72,8 +73,9 @@ def _GetDesiredVsToolchainHashes(version): # with ARM64 libraries and UWP support. return ['0b5ee4d2b1'] if version == '2022': - # VS 2022 17.4 with 10.0.22621.0 SDK with ARM64 libraries and UWP support. - return ['27370823e7'] + # VS 2022 17.9.2 with 10.0.22621.2428 SDK with ARM64 libraries and UWP + # support. + return ['7393122652'] raise Exception('Unsupported VS version %s' % version) @@ -90,6 +92,7 @@ def Update(version): 'win_toolchain', 'get_toolchain_if_necessary.py'), '--output-json', json_data_file, + '--toolchain-dir', toolchain_dir, ] + _GetDesiredVsToolchainHashes(version) subprocess.check_call(get_toolchain_args) return 0 diff --git a/yass/third_party/boringssl/src/util/build/build.go b/yass/third_party/boringssl/src/util/build/build.go new file mode 100644 index 0000000000..277bfdccce --- /dev/null +++ b/yass/third_party/boringssl/src/util/build/build.go @@ -0,0 +1,38 @@ +// Copyright (c) 2024, Google Inc. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +package build + +// A Target is a build target for consumption by the downstream build systems. +// All pre-generated files are baked input its source lists. +type Target struct { + // Srcs is the list of C or C++ files (determined by file extension) that are + // built into the target. + Srcs []string `json:"srcs,omitempty"` + // Hdrs is the list public headers that should be available to external + // projects using this target. + Hdrs []string `json:"hdrs,omitempty"` + // InternalHdrs is the list of internal headers that should be available to + // this target, as well as any internal targets using this target. + InternalHdrs []string `json:"internal_hdrs,omitempty"` + // Asm is the a list of assembly files to be passed to a gas-compatible + // assembler. + Asm []string `json:"asm,omitempty"` + // Nasm is the a list of assembly files to be passed to a nasm-compatible + // assembler. + Nasm []string `json:"nasm,omitempty"` + // Data is a list of test data files that should be available when the test is + // run. + Data []string `json:"data,omitempty"` +} diff --git a/yass/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc b/yass/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc index b66f2ac114..dd17f563f0 100644 --- a/yass/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc +++ b/yass/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc @@ -550,7 +550,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl "P-521" ], "hashAlg": [ - "SHA-1", "SHA2-224", "SHA2-256", "SHA2-384", @@ -562,7 +561,7 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl { "algorithm": "RSA", "mode": "keyGen", - "revision": "FIPS186-4", + "revision": "FIPS186-5", "infoGeneratedByServer": true, "pubExpMode": "fixed", "fixedPubExp": "010001", @@ -590,7 +589,7 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl { "algorithm": "RSA", "mode": "sigGen", - "revision": "FIPS186-4", + "revision": "FIPS186-5", "capabilities": [{ "sigType": "pkcs1v1.5", "properties": [{ @@ -701,26 +700,10 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl { "algorithm": "RSA", "mode": "sigVer", - "revision": "FIPS186-4", + "revision": "FIPS186-5", "pubExpMode": "fixed", "fixedPubExp": "010001", "capabilities": [{ - "sigType": "pkcs1v1.5", - "properties": [{ - "modulo": 1024, - "hashPair": [{ - "hashAlg": "SHA2-224" - }, { - "hashAlg": "SHA2-256" - }, { - "hashAlg": "SHA2-384" - }, { - "hashAlg": "SHA2-512" - }, { - "hashAlg": "SHA-1" - }] - }] - },{ "sigType": "pkcs1v1.5", "properties": [{ "modulo": 2048, @@ -732,8 +715,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl "hashAlg": "SHA2-384" }, { "hashAlg": "SHA2-512" - }, { - "hashAlg": "SHA-1" }] }] },{ @@ -748,8 +729,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl "hashAlg": "SHA2-384" }, { "hashAlg": "SHA2-512" - }, { - "hashAlg": "SHA-1" }] }] },{ @@ -764,29 +743,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl "hashAlg": "SHA2-384" }, { "hashAlg": "SHA2-512" - }, { - "hashAlg": "SHA-1" - }] - }] - },{ - "sigType": "pss", - "properties": [{ - "modulo": 1024, - "hashPair": [{ - "hashAlg": "SHA2-224", - "saltLen": 28 - }, { - "hashAlg": "SHA2-256", - "saltLen": 32 - }, { - "hashAlg": "SHA2-384", - "saltLen": 48 - }, { - "hashAlg": "SHA2-512/256", - "saltLen": 32 - }, { - "hashAlg": "SHA-1", - "saltLen": 20 }] }] },{ @@ -808,9 +764,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl }, { "hashAlg": "SHA2-512/256", "saltLen": 32 - }, { - "hashAlg": "SHA-1", - "saltLen": 20 }] }] },{ @@ -832,9 +785,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl }, { "hashAlg": "SHA2-512/256", "saltLen": 32 - }, { - "hashAlg": "SHA-1", - "saltLen": 20 }] }] },{ @@ -856,9 +806,6 @@ static bool GetConfig(const Span args[], ReplyCallback write_repl }, { "hashAlg": "SHA2-512/256", "saltLen": 32 - }, { - "hashAlg": "SHA-1", - "saltLen": 20 }] }] }] diff --git a/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.go b/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.go index cd08bed07b..063d6ac6e7 100644 --- a/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.go +++ b/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.go @@ -35,7 +35,7 @@ import ( // inputFile represents a textual assembly file. type inputFile struct { path string - // index is a unique identifer given to this file. It's used for + // index is a unique identifier given to this file. It's used for // mapping local symbols. index int // isArchive indicates that the input should be processed as an ar diff --git a/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg b/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg index 3381cae695..3ce01edf26 100644 --- a/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg +++ b/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg @@ -83,13 +83,16 @@ RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ARMRegister) ![fb:(+\-] ARMConstantTweak <- ((([us] "xt" [xwhb]) / "lsl" / "lsr" / "ror" / "asr") (WS '#' Offset)?) / - "mul vl" # multiply offset by the hardware's vector length + "mul vl" / # multiply offset by the hardware's vector length + "mul #" [0-9] ARMRegister <- "sp" / ([xwdqshb] [0-9] [0-9]?) / - "xzr" / "wzr" / "NZCV" / ARMVectorRegister / SVE2PredicateRegister / + "xzr" / "wzr" / "NZCV" / SVE2PredicateRegister / ARMVectorRegister / SVE2SpecialValue / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')? ) -ARMVectorRegister <- [vz] [0-9] [0-9]? ('.' [0-9]* [bsdhq] ('[' [0-9] [0-9]? ']')? )? -SVE2PredicateRegister <- "p" [0-9] [0-9]? "/" [mMzZ] +ARMVectorRegister <- [pvz] [0-9] [0-9]? ![[0-9a-z_]] ('.' [0-9]* [bsdhq] ('[' [0-9] [0-9]? ']')? )? +SVE2PredicateRegister <- "p" [0-9] [0-9]? "/" [[mz]] +# https://developer.arm.com/documentation/ddi0596/2020-12/SVE-Instructions/INCD--INCH--INCW--vector---Increment-vector-by-multiple-of-predicate-constraint-element-count- +SVE2SpecialValue <- ("pow2" / ("vl" [12345678] ![0-9] ) / "vl16" / "vl32" / "vl64" / "vl128" / "vl256" / "mul3" / "mul4" / "all") ![[0-9a-z_]] # Compilers only output a very limited number of expression forms. Rather than # implement a full expression parser, this enumerate those forms plus a few # that appear in our hand-written assembly. diff --git a/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg.go b/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg.go index f0963cc930..856518d9dc 100644 --- a/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg.go +++ b/yass/third_party/boringssl/src/util/fipstools/delocate/delocate.peg.go @@ -65,6 +65,7 @@ const ( ruleARMRegister ruleARMVectorRegister ruleSVE2PredicateRegister + ruleSVE2SpecialValue ruleMemoryRef ruleSymbolRef ruleLow12BitsSymbolRef @@ -127,6 +128,7 @@ var rul3s = [...]string{ "ARMRegister", "ARMVectorRegister", "SVE2PredicateRegister", + "SVE2SpecialValue", "MemoryRef", "SymbolRef", "Low12BitsSymbolRef", @@ -252,7 +254,7 @@ func (t *tokens32) Tokens() []token32 { type Asm struct { Buffer string buffer []rune - rules [59]func() bool + rules [60]func() bool parse func(rule ...int) error reset func() Pretty bool @@ -4770,7 +4772,7 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position, tokenIndex = position532, tokenIndex532 return false }, - /* 43 ARMConstantTweak <- <((((('u' / 's') (('x' / 'X') ('t' / 'T')) ('x' / 'w' / 'h' / 'b')) / (('l' / 'L') ('s' / 'S') ('l' / 'L')) / (('l' / 'L') ('s' / 'S') ('r' / 'R')) / (('r' / 'R') ('o' / 'O') ('r' / 'R')) / (('a' / 'A') ('s' / 'S') ('r' / 'R'))) (WS '#' Offset)?) / (('m' / 'M') ('u' / 'U') ('l' / 'L') ' ' ('v' / 'V') ('l' / 'L')))> */ + /* 43 ARMConstantTweak <- <((((('u' / 's') (('x' / 'X') ('t' / 'T')) ('x' / 'w' / 'h' / 'b')) / (('l' / 'L') ('s' / 'S') ('l' / 'L')) / (('l' / 'L') ('s' / 'S') ('r' / 'R')) / (('r' / 'R') ('o' / 'O') ('r' / 'R')) / (('a' / 'A') ('s' / 'S') ('r' / 'R'))) (WS '#' Offset)?) / (('m' / 'M') ('u' / 'U') ('l' / 'L') ' ' ('v' / 'V') ('l' / 'L')) / (('m' / 'M') ('u' / 'U') ('l' / 'L') ' ' '#' [0-9]))> */ func() bool { position573, tokenIndex573 := position, tokenIndex { @@ -5068,84 +5070,144 @@ func (p *Asm) Init(options ...func(*Asm) error) error { l576: position, tokenIndex = position575, tokenIndex575 { - position618, tokenIndex618 := position, tokenIndex + position619, tokenIndex619 := position, tokenIndex if buffer[position] != rune('m') { - goto l619 + goto l620 } position++ + goto l619 + l620: + position, tokenIndex = position619, tokenIndex619 + if buffer[position] != rune('M') { + goto l618 + } + position++ + } + l619: + { + position621, tokenIndex621 := position, tokenIndex + if buffer[position] != rune('u') { + goto l622 + } + position++ + goto l621 + l622: + position, tokenIndex = position621, tokenIndex621 + if buffer[position] != rune('U') { + goto l618 + } + position++ + } + l621: + { + position623, tokenIndex623 := position, tokenIndex + if buffer[position] != rune('l') { + goto l624 + } + position++ + goto l623 + l624: + position, tokenIndex = position623, tokenIndex623 + if buffer[position] != rune('L') { + goto l618 + } + position++ + } + l623: + if buffer[position] != rune(' ') { goto l618 - l619: - position, tokenIndex = position618, tokenIndex618 + } + position++ + { + position625, tokenIndex625 := position, tokenIndex + if buffer[position] != rune('v') { + goto l626 + } + position++ + goto l625 + l626: + position, tokenIndex = position625, tokenIndex625 + if buffer[position] != rune('V') { + goto l618 + } + position++ + } + l625: + { + position627, tokenIndex627 := position, tokenIndex + if buffer[position] != rune('l') { + goto l628 + } + position++ + goto l627 + l628: + position, tokenIndex = position627, tokenIndex627 + if buffer[position] != rune('L') { + goto l618 + } + position++ + } + l627: + goto l575 + l618: + position, tokenIndex = position575, tokenIndex575 + { + position629, tokenIndex629 := position, tokenIndex + if buffer[position] != rune('m') { + goto l630 + } + position++ + goto l629 + l630: + position, tokenIndex = position629, tokenIndex629 if buffer[position] != rune('M') { goto l573 } position++ } - l618: + l629: { - position620, tokenIndex620 := position, tokenIndex + position631, tokenIndex631 := position, tokenIndex if buffer[position] != rune('u') { - goto l621 + goto l632 } position++ - goto l620 - l621: - position, tokenIndex = position620, tokenIndex620 + goto l631 + l632: + position, tokenIndex = position631, tokenIndex631 if buffer[position] != rune('U') { goto l573 } position++ } - l620: + l631: { - position622, tokenIndex622 := position, tokenIndex + position633, tokenIndex633 := position, tokenIndex if buffer[position] != rune('l') { - goto l623 + goto l634 } position++ - goto l622 - l623: - position, tokenIndex = position622, tokenIndex622 + goto l633 + l634: + position, tokenIndex = position633, tokenIndex633 if buffer[position] != rune('L') { goto l573 } position++ } - l622: + l633: if buffer[position] != rune(' ') { goto l573 } position++ - { - position624, tokenIndex624 := position, tokenIndex - if buffer[position] != rune('v') { - goto l625 - } - position++ - goto l624 - l625: - position, tokenIndex = position624, tokenIndex624 - if buffer[position] != rune('V') { - goto l573 - } - position++ + if buffer[position] != rune('#') { + goto l573 } - l624: - { - position626, tokenIndex626 := position, tokenIndex - if buffer[position] != rune('l') { - goto l627 - } - position++ - goto l626 - l627: - position, tokenIndex = position626, tokenIndex626 - if buffer[position] != rune('L') { - goto l573 - } - position++ + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l573 } - l626: + position++ } l575: add(ruleARMConstantTweak, position574) @@ -5155,172 +5217,124 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position, tokenIndex = position573, tokenIndex573 return false }, - /* 44 ARMRegister <- <((('s' / 'S') ('p' / 'P')) / (('x' / 'w' / 'd' / 'q' / 's' / 'h' / 'b') [0-9] [0-9]?) / (('x' / 'X') ('z' / 'Z') ('r' / 'R')) / (('w' / 'W') ('z' / 'Z') ('r' / 'R')) / (('n' / 'N') ('z' / 'Z') ('c' / 'C') ('v' / 'V')) / ARMVectorRegister / SVE2PredicateRegister / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')?))> */ + /* 44 ARMRegister <- <((('s' / 'S') ('p' / 'P')) / (('x' / 'w' / 'd' / 'q' / 's' / 'h' / 'b') [0-9] [0-9]?) / (('x' / 'X') ('z' / 'Z') ('r' / 'R')) / (('w' / 'W') ('z' / 'Z') ('r' / 'R')) / (('n' / 'N') ('z' / 'Z') ('c' / 'C') ('v' / 'V')) / SVE2PredicateRegister / ARMVectorRegister / SVE2SpecialValue / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')?))> */ func() bool { - position628, tokenIndex628 := position, tokenIndex + position635, tokenIndex635 := position, tokenIndex { - position629 := position + position636 := position { - position630, tokenIndex630 := position, tokenIndex + position637, tokenIndex637 := position, tokenIndex { - position632, tokenIndex632 := position, tokenIndex + position639, tokenIndex639 := position, tokenIndex if buffer[position] != rune('s') { - goto l633 - } - position++ - goto l632 - l633: - position, tokenIndex = position632, tokenIndex632 - if buffer[position] != rune('S') { - goto l631 - } - position++ - } - l632: - { - position634, tokenIndex634 := position, tokenIndex - if buffer[position] != rune('p') { - goto l635 - } - position++ - goto l634 - l635: - position, tokenIndex = position634, tokenIndex634 - if buffer[position] != rune('P') { - goto l631 - } - position++ - } - l634: - goto l630 - l631: - position, tokenIndex = position630, tokenIndex630 - { - position637, tokenIndex637 := position, tokenIndex - if buffer[position] != rune('x') { - goto l638 - } - position++ - goto l637 - l638: - position, tokenIndex = position637, tokenIndex637 - if buffer[position] != rune('w') { - goto l639 - } - position++ - goto l637 - l639: - position, tokenIndex = position637, tokenIndex637 - if buffer[position] != rune('d') { goto l640 } position++ - goto l637 + goto l639 l640: - position, tokenIndex = position637, tokenIndex637 - if buffer[position] != rune('q') { - goto l641 + position, tokenIndex = position639, tokenIndex639 + if buffer[position] != rune('S') { + goto l638 } position++ - goto l637 - l641: - position, tokenIndex = position637, tokenIndex637 - if buffer[position] != rune('s') { + } + l639: + { + position641, tokenIndex641 := position, tokenIndex + if buffer[position] != rune('p') { goto l642 } position++ - goto l637 + goto l641 l642: - position, tokenIndex = position637, tokenIndex637 - if buffer[position] != rune('h') { - goto l643 - } - position++ - goto l637 - l643: - position, tokenIndex = position637, tokenIndex637 - if buffer[position] != rune('b') { - goto l636 + position, tokenIndex = position641, tokenIndex641 + if buffer[position] != rune('P') { + goto l638 } position++ } - l637: - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l636 - } - position++ + l641: + goto l637 + l638: + position, tokenIndex = position637, tokenIndex637 { position644, tokenIndex644 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l644 + if buffer[position] != rune('x') { + goto l645 } position++ - goto l645 - l644: + goto l644 + l645: position, tokenIndex = position644, tokenIndex644 - } - l645: - goto l630 - l636: - position, tokenIndex = position630, tokenIndex630 - { - position647, tokenIndex647 := position, tokenIndex - if buffer[position] != rune('x') { + if buffer[position] != rune('w') { + goto l646 + } + position++ + goto l644 + l646: + position, tokenIndex = position644, tokenIndex644 + if buffer[position] != rune('d') { + goto l647 + } + position++ + goto l644 + l647: + position, tokenIndex = position644, tokenIndex644 + if buffer[position] != rune('q') { goto l648 } position++ - goto l647 + goto l644 l648: - position, tokenIndex = position647, tokenIndex647 - if buffer[position] != rune('X') { - goto l646 + position, tokenIndex = position644, tokenIndex644 + if buffer[position] != rune('s') { + goto l649 } position++ - } - l647: - { - position649, tokenIndex649 := position, tokenIndex - if buffer[position] != rune('z') { + goto l644 + l649: + position, tokenIndex = position644, tokenIndex644 + if buffer[position] != rune('h') { goto l650 } position++ - goto l649 + goto l644 l650: - position, tokenIndex = position649, tokenIndex649 - if buffer[position] != rune('Z') { - goto l646 + position, tokenIndex = position644, tokenIndex644 + if buffer[position] != rune('b') { + goto l643 } position++ } - l649: + l644: + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l643 + } + position++ { position651, tokenIndex651 := position, tokenIndex - if buffer[position] != rune('r') { - goto l652 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l651 } position++ - goto l651 - l652: + goto l652 + l651: position, tokenIndex = position651, tokenIndex651 - if buffer[position] != rune('R') { - goto l646 - } - position++ } - l651: - goto l630 - l646: - position, tokenIndex = position630, tokenIndex630 + l652: + goto l637 + l643: + position, tokenIndex = position637, tokenIndex637 { position654, tokenIndex654 := position, tokenIndex - if buffer[position] != rune('w') { + if buffer[position] != rune('x') { goto l655 } position++ goto l654 l655: position, tokenIndex = position654, tokenIndex654 - if buffer[position] != rune('W') { + if buffer[position] != rune('X') { goto l653 } position++ @@ -5356,19 +5370,19 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position++ } l658: - goto l630 + goto l637 l653: - position, tokenIndex = position630, tokenIndex630 + position, tokenIndex = position637, tokenIndex637 { position661, tokenIndex661 := position, tokenIndex - if buffer[position] != rune('n') { + if buffer[position] != rune('w') { goto l662 } position++ goto l661 l662: position, tokenIndex = position661, tokenIndex661 - if buffer[position] != rune('N') { + if buffer[position] != rune('W') { goto l660 } position++ @@ -5391,1375 +5405,2062 @@ func (p *Asm) Init(options ...func(*Asm) error) error { l663: { position665, tokenIndex665 := position, tokenIndex - if buffer[position] != rune('c') { + if buffer[position] != rune('r') { goto l666 } position++ goto l665 l666: position, tokenIndex = position665, tokenIndex665 - if buffer[position] != rune('C') { + if buffer[position] != rune('R') { goto l660 } position++ } l665: - { - position667, tokenIndex667 := position, tokenIndex - if buffer[position] != rune('v') { - goto l668 - } - position++ - goto l667 - l668: - position, tokenIndex = position667, tokenIndex667 - if buffer[position] != rune('V') { - goto l660 - } - position++ - } - l667: - goto l630 + goto l637 l660: - position, tokenIndex = position630, tokenIndex630 - if !_rules[ruleARMVectorRegister]() { - goto l669 - } - goto l630 - l669: - position, tokenIndex = position630, tokenIndex630 - if !_rules[ruleSVE2PredicateRegister]() { - goto l670 - } - goto l630 - l670: - position, tokenIndex = position630, tokenIndex630 - if buffer[position] != rune('{') { - goto l628 - } - position++ + position, tokenIndex = position637, tokenIndex637 { - position671, tokenIndex671 := position, tokenIndex - if !_rules[ruleWS]() { + position668, tokenIndex668 := position, tokenIndex + if buffer[position] != rune('n') { + goto l669 + } + position++ + goto l668 + l669: + position, tokenIndex = position668, tokenIndex668 + if buffer[position] != rune('N') { + goto l667 + } + position++ + } + l668: + { + position670, tokenIndex670 := position, tokenIndex + if buffer[position] != rune('z') { goto l671 } - goto l672 + position++ + goto l670 l671: - position, tokenIndex = position671, tokenIndex671 - } - l672: - if !_rules[ruleARMVectorRegister]() { - goto l628 - } - l673: - { - position674, tokenIndex674 := position, tokenIndex - if buffer[position] != rune(',') { - goto l674 + position, tokenIndex = position670, tokenIndex670 + if buffer[position] != rune('Z') { + goto l667 } position++ - { - position675, tokenIndex675 := position, tokenIndex - if !_rules[ruleWS]() { - goto l675 - } - goto l676 - l675: - position, tokenIndex = position675, tokenIndex675 - } - l676: - if !_rules[ruleARMVectorRegister]() { - goto l674 - } - goto l673 - l674: - position, tokenIndex = position674, tokenIndex674 } + l670: { - position677, tokenIndex677 := position, tokenIndex - if !_rules[ruleWS]() { - goto l677 + position672, tokenIndex672 := position, tokenIndex + if buffer[position] != rune('c') { + goto l673 } - goto l678 - l677: - position, tokenIndex = position677, tokenIndex677 + position++ + goto l672 + l673: + position, tokenIndex = position672, tokenIndex672 + if buffer[position] != rune('C') { + goto l667 + } + position++ } + l672: + { + position674, tokenIndex674 := position, tokenIndex + if buffer[position] != rune('v') { + goto l675 + } + position++ + goto l674 + l675: + position, tokenIndex = position674, tokenIndex674 + if buffer[position] != rune('V') { + goto l667 + } + position++ + } + l674: + goto l637 + l667: + position, tokenIndex = position637, tokenIndex637 + if !_rules[ruleSVE2PredicateRegister]() { + goto l676 + } + goto l637 + l676: + position, tokenIndex = position637, tokenIndex637 + if !_rules[ruleARMVectorRegister]() { + goto l677 + } + goto l637 + l677: + position, tokenIndex = position637, tokenIndex637 + if !_rules[ruleSVE2SpecialValue]() { + goto l678 + } + goto l637 l678: - if buffer[position] != rune('}') { - goto l628 + position, tokenIndex = position637, tokenIndex637 + if buffer[position] != rune('{') { + goto l635 } position++ { position679, tokenIndex679 := position, tokenIndex - if buffer[position] != rune('[') { + if !_rules[ruleWS]() { goto l679 } - position++ - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l679 - } - position++ - { - position681, tokenIndex681 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l681 - } - position++ - goto l682 - l681: - position, tokenIndex = position681, tokenIndex681 - } - l682: - if buffer[position] != rune(']') { - goto l679 - } - position++ goto l680 l679: position, tokenIndex = position679, tokenIndex679 } l680: - } - l630: - add(ruleARMRegister, position629) - } - return true - l628: - position, tokenIndex = position628, tokenIndex628 - return false - }, - /* 45 ARMVectorRegister <- <(('v' / 'z') [0-9] [0-9]? ('.' [0-9]* ('b' / 's' / 'd' / 'h' / 'q') ('[' [0-9] [0-9]? ']')?)?)> */ - func() bool { - position683, tokenIndex683 := position, tokenIndex - { - position684 := position - { - position685, tokenIndex685 := position, tokenIndex - if buffer[position] != rune('v') { - goto l686 + if !_rules[ruleARMVectorRegister]() { + goto l635 } - position++ - goto l685 - l686: - position, tokenIndex = position685, tokenIndex685 - if buffer[position] != rune('z') { - goto l683 - } - position++ - } - l685: - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l683 - } - position++ - { - position687, tokenIndex687 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l687 - } - position++ - goto l688 - l687: - position, tokenIndex = position687, tokenIndex687 - } - l688: - { - position689, tokenIndex689 := position, tokenIndex - if buffer[position] != rune('.') { - goto l689 - } - position++ - l691: + l681: { - position692, tokenIndex692 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l692 - } - position++ - goto l691 - l692: - position, tokenIndex = position692, tokenIndex692 - } - { - position693, tokenIndex693 := position, tokenIndex - if buffer[position] != rune('b') { - goto l694 - } - position++ - goto l693 - l694: - position, tokenIndex = position693, tokenIndex693 - if buffer[position] != rune('s') { - goto l695 - } - position++ - goto l693 - l695: - position, tokenIndex = position693, tokenIndex693 - if buffer[position] != rune('d') { - goto l696 - } - position++ - goto l693 - l696: - position, tokenIndex = position693, tokenIndex693 - if buffer[position] != rune('h') { - goto l697 - } - position++ - goto l693 - l697: - position, tokenIndex = position693, tokenIndex693 - if buffer[position] != rune('q') { - goto l689 - } - position++ - } - l693: - { - position698, tokenIndex698 := position, tokenIndex - if buffer[position] != rune('[') { - goto l698 - } - position++ - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l698 + position682, tokenIndex682 := position, tokenIndex + if buffer[position] != rune(',') { + goto l682 } position++ { - position700, tokenIndex700 := position, tokenIndex + position683, tokenIndex683 := position, tokenIndex + if !_rules[ruleWS]() { + goto l683 + } + goto l684 + l683: + position, tokenIndex = position683, tokenIndex683 + } + l684: + if !_rules[ruleARMVectorRegister]() { + goto l682 + } + goto l681 + l682: + position, tokenIndex = position682, tokenIndex682 + } + { + position685, tokenIndex685 := position, tokenIndex + if !_rules[ruleWS]() { + goto l685 + } + goto l686 + l685: + position, tokenIndex = position685, tokenIndex685 + } + l686: + if buffer[position] != rune('}') { + goto l635 + } + position++ + { + position687, tokenIndex687 := position, tokenIndex + if buffer[position] != rune('[') { + goto l687 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l687 + } + position++ + { + position689, tokenIndex689 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l700 + goto l689 } position++ - goto l701 - l700: - position, tokenIndex = position700, tokenIndex700 + goto l690 + l689: + position, tokenIndex = position689, tokenIndex689 } - l701: + l690: if buffer[position] != rune(']') { - goto l698 + goto l687 + } + position++ + goto l688 + l687: + position, tokenIndex = position687, tokenIndex687 + } + l688: + } + l637: + add(ruleARMRegister, position636) + } + return true + l635: + position, tokenIndex = position635, tokenIndex635 + return false + }, + /* 45 ARMVectorRegister <- <(('p' / 'v' / 'z') [0-9] [0-9]? !([0-9] / [0-9] / ([a-z] / [A-Z]) / '_') ('.' [0-9]* ('b' / 's' / 'd' / 'h' / 'q') ('[' [0-9] [0-9]? ']')?)?)> */ + func() bool { + position691, tokenIndex691 := position, tokenIndex + { + position692 := position + { + position693, tokenIndex693 := position, tokenIndex + if buffer[position] != rune('p') { + goto l694 + } + position++ + goto l693 + l694: + position, tokenIndex = position693, tokenIndex693 + if buffer[position] != rune('v') { + goto l695 + } + position++ + goto l693 + l695: + position, tokenIndex = position693, tokenIndex693 + if buffer[position] != rune('z') { + goto l691 + } + position++ + } + l693: + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l691 + } + position++ + { + position696, tokenIndex696 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l696 + } + position++ + goto l697 + l696: + position, tokenIndex = position696, tokenIndex696 + } + l697: + { + position698, tokenIndex698 := position, tokenIndex + { + position699, tokenIndex699 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l700 } position++ goto l699 - l698: - position, tokenIndex = position698, tokenIndex698 + l700: + position, tokenIndex = position699, tokenIndex699 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l701 + } + position++ + goto l699 + l701: + position, tokenIndex = position699, tokenIndex699 + { + position703, tokenIndex703 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { + goto l704 + } + position++ + goto l703 + l704: + position, tokenIndex = position703, tokenIndex703 + if c := buffer[position]; c < rune('A') || c > rune('Z') { + goto l702 + } + position++ + } + l703: + goto l699 + l702: + position, tokenIndex = position699, tokenIndex699 + if buffer[position] != rune('_') { + goto l698 + } + position++ } l699: - goto l690 - l689: - position, tokenIndex = position689, tokenIndex689 + goto l691 + l698: + position, tokenIndex = position698, tokenIndex698 } - l690: - add(ruleARMVectorRegister, position684) - } - return true - l683: - position, tokenIndex = position683, tokenIndex683 - return false - }, - /* 46 SVE2PredicateRegister <- <(('p' / 'P') [0-9] [0-9]? '/' ('m' / 'M' / 'z' / 'Z'))> */ - func() bool { - position702, tokenIndex702 := position, tokenIndex - { - position703 := position { - position704, tokenIndex704 := position, tokenIndex - if buffer[position] != rune('p') { + position705, tokenIndex705 := position, tokenIndex + if buffer[position] != rune('.') { goto l705 } position++ - goto l704 - l705: - position, tokenIndex = position704, tokenIndex704 - if buffer[position] != rune('P') { - goto l702 + l707: + { + position708, tokenIndex708 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l708 + } + position++ + goto l707 + l708: + position, tokenIndex = position708, tokenIndex708 } - position++ - } - l704: - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l702 - } - position++ - { - position706, tokenIndex706 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l706 - } - position++ - goto l707 - l706: - position, tokenIndex = position706, tokenIndex706 - } - l707: - if buffer[position] != rune('/') { - goto l702 - } - position++ - { - position708, tokenIndex708 := position, tokenIndex - if buffer[position] != rune('m') { + { + position709, tokenIndex709 := position, tokenIndex + if buffer[position] != rune('b') { + goto l710 + } + position++ goto l709 + l710: + position, tokenIndex = position709, tokenIndex709 + if buffer[position] != rune('s') { + goto l711 + } + position++ + goto l709 + l711: + position, tokenIndex = position709, tokenIndex709 + if buffer[position] != rune('d') { + goto l712 + } + position++ + goto l709 + l712: + position, tokenIndex = position709, tokenIndex709 + if buffer[position] != rune('h') { + goto l713 + } + position++ + goto l709 + l713: + position, tokenIndex = position709, tokenIndex709 + if buffer[position] != rune('q') { + goto l705 + } + position++ } - position++ - goto l708 l709: - position, tokenIndex = position708, tokenIndex708 - if buffer[position] != rune('M') { - goto l710 + { + position714, tokenIndex714 := position, tokenIndex + if buffer[position] != rune('[') { + goto l714 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l714 + } + position++ + { + position716, tokenIndex716 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l716 + } + position++ + goto l717 + l716: + position, tokenIndex = position716, tokenIndex716 + } + l717: + if buffer[position] != rune(']') { + goto l714 + } + position++ + goto l715 + l714: + position, tokenIndex = position714, tokenIndex714 } - position++ - goto l708 - l710: - position, tokenIndex = position708, tokenIndex708 - if buffer[position] != rune('z') { - goto l711 - } - position++ - goto l708 - l711: - position, tokenIndex = position708, tokenIndex708 - if buffer[position] != rune('Z') { - goto l702 - } - position++ + l715: + goto l706 + l705: + position, tokenIndex = position705, tokenIndex705 } - l708: - add(ruleSVE2PredicateRegister, position703) + l706: + add(ruleARMVectorRegister, position692) } return true - l702: - position, tokenIndex = position702, tokenIndex702 + l691: + position, tokenIndex = position691, tokenIndex691 return false }, - /* 47 MemoryRef <- <((SymbolRef BaseIndexScale) / SymbolRef / Low12BitsSymbolRef / (Offset* BaseIndexScale) / (SegmentRegister Offset BaseIndexScale) / (SegmentRegister BaseIndexScale) / (SegmentRegister Offset) / ARMBaseIndexScale / BaseIndexScale)> */ + /* 46 SVE2PredicateRegister <- <(('p' / 'P') [0-9] [0-9]? '/' ('m' / 'M' / ('z' / 'Z')))> */ func() bool { - position712, tokenIndex712 := position, tokenIndex + position718, tokenIndex718 := position, tokenIndex { - position713 := position + position719 := position { - position714, tokenIndex714 := position, tokenIndex - if !_rules[ruleSymbolRef]() { - goto l715 + position720, tokenIndex720 := position, tokenIndex + if buffer[position] != rune('p') { + goto l721 } - if !_rules[ruleBaseIndexScale]() { - goto l715 - } - goto l714 - l715: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleSymbolRef]() { - goto l716 - } - goto l714 - l716: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleLow12BitsSymbolRef]() { - goto l717 - } - goto l714 - l717: - position, tokenIndex = position714, tokenIndex714 - l719: - { - position720, tokenIndex720 := position, tokenIndex - if !_rules[ruleOffset]() { - goto l720 - } - goto l719 - l720: - position, tokenIndex = position720, tokenIndex720 - } - if !_rules[ruleBaseIndexScale]() { + position++ + goto l720 + l721: + position, tokenIndex = position720, tokenIndex720 + if buffer[position] != rune('P') { goto l718 } - goto l714 - l718: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleSegmentRegister]() { - goto l721 - } - if !_rules[ruleOffset]() { - goto l721 - } - if !_rules[ruleBaseIndexScale]() { - goto l721 - } - goto l714 - l721: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleSegmentRegister]() { - goto l722 - } - if !_rules[ruleBaseIndexScale]() { - goto l722 - } - goto l714 - l722: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleSegmentRegister]() { - goto l723 - } - if !_rules[ruleOffset]() { - goto l723 - } - goto l714 - l723: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleARMBaseIndexScale]() { - goto l724 - } - goto l714 - l724: - position, tokenIndex = position714, tokenIndex714 - if !_rules[ruleBaseIndexScale]() { - goto l712 - } + position++ } - l714: - add(ruleMemoryRef, position713) - } - return true - l712: - position, tokenIndex = position712, tokenIndex712 - return false - }, - /* 48 SymbolRef <- <((Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?)> */ - func() bool { - position725, tokenIndex725 := position, tokenIndex - { - position726 := position + l720: + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l718 + } + position++ { - position727, tokenIndex727 := position, tokenIndex - l729: - { - position730, tokenIndex730 := position, tokenIndex - if !_rules[ruleOffset]() { - goto l730 - } - goto l729 - l730: - position, tokenIndex = position730, tokenIndex730 - } - if buffer[position] != rune('+') { - goto l727 + position722, tokenIndex722 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l722 } position++ - goto l728 - l727: - position, tokenIndex = position727, tokenIndex727 + goto l723 + l722: + position, tokenIndex = position722, tokenIndex722 } - l728: + l723: + if buffer[position] != rune('/') { + goto l718 + } + position++ + { + position724, tokenIndex724 := position, tokenIndex + if buffer[position] != rune('m') { + goto l725 + } + position++ + goto l724 + l725: + position, tokenIndex = position724, tokenIndex724 + if buffer[position] != rune('M') { + goto l726 + } + position++ + goto l724 + l726: + position, tokenIndex = position724, tokenIndex724 + { + position727, tokenIndex727 := position, tokenIndex + if buffer[position] != rune('z') { + goto l728 + } + position++ + goto l727 + l728: + position, tokenIndex = position727, tokenIndex727 + if buffer[position] != rune('Z') { + goto l718 + } + position++ + } + l727: + } + l724: + add(ruleSVE2PredicateRegister, position719) + } + return true + l718: + position, tokenIndex = position718, tokenIndex718 + return false + }, + /* 47 SVE2SpecialValue <- <(((('p' / 'P') ('o' / 'O') ('w' / 'W') '2') / (('v' / 'V') ('l' / 'L') ('1' / '2' / '3' / '4' / '5' / '6' / '7' / '8') ![0-9]) / (('v' / 'V') ('l' / 'L') '1' '6') / (('v' / 'V') ('l' / 'L') '3' '2') / (('v' / 'V') ('l' / 'L') '6' '4') / (('v' / 'V') ('l' / 'L') '1' '2' '8') / (('v' / 'V') ('l' / 'L') '2' '5' '6') / (('m' / 'M') ('u' / 'U') ('l' / 'L') '3') / (('m' / 'M') ('u' / 'U') ('l' / 'L') '4') / (('a' / 'A') ('l' / 'L') ('l' / 'L'))) !([0-9] / [0-9] / ([a-z] / [A-Z]) / '_'))> */ + func() bool { + position729, tokenIndex729 := position, tokenIndex + { + position730 := position { position731, tokenIndex731 := position, tokenIndex - if !_rules[ruleLocalSymbol]() { + { + position733, tokenIndex733 := position, tokenIndex + if buffer[position] != rune('p') { + goto l734 + } + position++ + goto l733 + l734: + position, tokenIndex = position733, tokenIndex733 + if buffer[position] != rune('P') { + goto l732 + } + position++ + } + l733: + { + position735, tokenIndex735 := position, tokenIndex + if buffer[position] != rune('o') { + goto l736 + } + position++ + goto l735 + l736: + position, tokenIndex = position735, tokenIndex735 + if buffer[position] != rune('O') { + goto l732 + } + position++ + } + l735: + { + position737, tokenIndex737 := position, tokenIndex + if buffer[position] != rune('w') { + goto l738 + } + position++ + goto l737 + l738: + position, tokenIndex = position737, tokenIndex737 + if buffer[position] != rune('W') { + goto l732 + } + position++ + } + l737: + if buffer[position] != rune('2') { goto l732 } + position++ goto l731 l732: position, tokenIndex = position731, tokenIndex731 - if !_rules[ruleSymbolName]() { - goto l725 - } - } - l731: - l733: - { - position734, tokenIndex734 := position, tokenIndex - if !_rules[ruleOffset]() { - goto l734 - } - goto l733 - l734: - position, tokenIndex = position734, tokenIndex734 - } - { - position735, tokenIndex735 := position, tokenIndex - if buffer[position] != rune('@') { - goto l735 - } - position++ - if !_rules[ruleSection]() { - goto l735 - } - l737: { - position738, tokenIndex738 := position, tokenIndex - if !_rules[ruleOffset]() { - goto l738 + position740, tokenIndex740 := position, tokenIndex + if buffer[position] != rune('v') { + goto l741 } - goto l737 - l738: - position, tokenIndex = position738, tokenIndex738 + position++ + goto l740 + l741: + position, tokenIndex = position740, tokenIndex740 + if buffer[position] != rune('V') { + goto l739 + } + position++ } - goto l736 - l735: - position, tokenIndex = position735, tokenIndex735 - } - l736: - add(ruleSymbolRef, position726) - } - return true - l725: - position, tokenIndex = position725, tokenIndex725 - return false - }, - /* 49 Low12BitsSymbolRef <- <(':' ('l' / 'L') ('o' / 'O') '1' '2' ':' (LocalSymbol / SymbolName) Offset?)> */ - func() bool { - position739, tokenIndex739 := position, tokenIndex - { - position740 := position - if buffer[position] != rune(':') { - goto l739 - } - position++ - { - position741, tokenIndex741 := position, tokenIndex - if buffer[position] != rune('l') { - goto l742 - } - position++ - goto l741 - l742: - position, tokenIndex = position741, tokenIndex741 - if buffer[position] != rune('L') { - goto l739 - } - position++ - } - l741: - { - position743, tokenIndex743 := position, tokenIndex - if buffer[position] != rune('o') { - goto l744 - } - position++ - goto l743 - l744: - position, tokenIndex = position743, tokenIndex743 - if buffer[position] != rune('O') { - goto l739 - } - position++ - } - l743: - if buffer[position] != rune('1') { - goto l739 - } - position++ - if buffer[position] != rune('2') { - goto l739 - } - position++ - if buffer[position] != rune(':') { - goto l739 - } - position++ - { - position745, tokenIndex745 := position, tokenIndex - if !_rules[ruleLocalSymbol]() { - goto l746 - } - goto l745 - l746: - position, tokenIndex = position745, tokenIndex745 - if !_rules[ruleSymbolName]() { - goto l739 - } - } - l745: - { - position747, tokenIndex747 := position, tokenIndex - if !_rules[ruleOffset]() { - goto l747 - } - goto l748 - l747: - position, tokenIndex = position747, tokenIndex747 - } - l748: - add(ruleLow12BitsSymbolRef, position740) - } - return true - l739: - position, tokenIndex = position739, tokenIndex739 - return false - }, - /* 50 ARMBaseIndexScale <- <('[' ARMRegister (',' WS? (('#' Offset (('*' [0-9]+) / ('*' '(' [0-9]+ Operator [0-9]+ ')') / ('+' [0-9]+)*)?) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement?)> */ - func() bool { - position749, tokenIndex749 := position, tokenIndex - { - position750 := position - if buffer[position] != rune('[') { - goto l749 - } - position++ - if !_rules[ruleARMRegister]() { - goto l749 - } - { - position751, tokenIndex751 := position, tokenIndex - if buffer[position] != rune(',') { - goto l751 - } - position++ + l740: { - position753, tokenIndex753 := position, tokenIndex - if !_rules[ruleWS]() { + position742, tokenIndex742 := position, tokenIndex + if buffer[position] != rune('l') { + goto l743 + } + position++ + goto l742 + l743: + position, tokenIndex = position742, tokenIndex742 + if buffer[position] != rune('L') { + goto l739 + } + position++ + } + l742: + { + position744, tokenIndex744 := position, tokenIndex + if buffer[position] != rune('1') { + goto l745 + } + position++ + goto l744 + l745: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('2') { + goto l746 + } + position++ + goto l744 + l746: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('3') { + goto l747 + } + position++ + goto l744 + l747: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('4') { + goto l748 + } + position++ + goto l744 + l748: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('5') { + goto l749 + } + position++ + goto l744 + l749: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('6') { + goto l750 + } + position++ + goto l744 + l750: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('7') { + goto l751 + } + position++ + goto l744 + l751: + position, tokenIndex = position744, tokenIndex744 + if buffer[position] != rune('8') { + goto l739 + } + position++ + } + l744: + { + position752, tokenIndex752 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l752 + } + position++ + goto l739 + l752: + position, tokenIndex = position752, tokenIndex752 + } + goto l731 + l739: + position, tokenIndex = position731, tokenIndex731 + { + position754, tokenIndex754 := position, tokenIndex + if buffer[position] != rune('v') { + goto l755 + } + position++ + goto l754 + l755: + position, tokenIndex = position754, tokenIndex754 + if buffer[position] != rune('V') { goto l753 } - goto l754 - l753: - position, tokenIndex = position753, tokenIndex753 + position++ } l754: { - position755, tokenIndex755 := position, tokenIndex - if buffer[position] != rune('#') { - goto l756 + position756, tokenIndex756 := position, tokenIndex + if buffer[position] != rune('l') { + goto l757 } position++ - if !_rules[ruleOffset]() { - goto l756 + goto l756 + l757: + position, tokenIndex = position756, tokenIndex756 + if buffer[position] != rune('L') { + goto l753 } - { - position757, tokenIndex757 := position, tokenIndex - { - position759, tokenIndex759 := position, tokenIndex - if buffer[position] != rune('*') { - goto l760 - } - position++ - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l760 - } - position++ - l761: - { - position762, tokenIndex762 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l762 - } - position++ - goto l761 - l762: - position, tokenIndex = position762, tokenIndex762 - } - goto l759 - l760: - position, tokenIndex = position759, tokenIndex759 - if buffer[position] != rune('*') { - goto l763 - } - position++ - if buffer[position] != rune('(') { - goto l763 - } - position++ - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l763 - } - position++ - l764: - { - position765, tokenIndex765 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l765 - } - position++ - goto l764 - l765: - position, tokenIndex = position765, tokenIndex765 - } - if !_rules[ruleOperator]() { - goto l763 - } - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l763 - } - position++ - l766: - { - position767, tokenIndex767 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l767 - } - position++ - goto l766 - l767: - position, tokenIndex = position767, tokenIndex767 - } - if buffer[position] != rune(')') { - goto l763 - } - position++ - goto l759 - l763: - position, tokenIndex = position759, tokenIndex759 - l768: - { - position769, tokenIndex769 := position, tokenIndex - if buffer[position] != rune('+') { - goto l769 - } - position++ - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l769 - } - position++ - l770: - { - position771, tokenIndex771 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l771 - } - position++ - goto l770 - l771: - position, tokenIndex = position771, tokenIndex771 - } - goto l768 - l769: - position, tokenIndex = position769, tokenIndex769 - } - } - l759: + position++ + } + l756: + if buffer[position] != rune('1') { + goto l753 + } + position++ + if buffer[position] != rune('6') { + goto l753 + } + position++ + goto l731 + l753: + position, tokenIndex = position731, tokenIndex731 + { + position759, tokenIndex759 := position, tokenIndex + if buffer[position] != rune('v') { + goto l760 + } + position++ + goto l759 + l760: + position, tokenIndex = position759, tokenIndex759 + if buffer[position] != rune('V') { goto l758 - - position, tokenIndex = position757, tokenIndex757 } - l758: - goto l755 - l756: - position, tokenIndex = position755, tokenIndex755 - if !_rules[ruleARMGOTLow12]() { + position++ + } + l759: + { + position761, tokenIndex761 := position, tokenIndex + if buffer[position] != rune('l') { + goto l762 + } + position++ + goto l761 + l762: + position, tokenIndex = position761, tokenIndex761 + if buffer[position] != rune('L') { + goto l758 + } + position++ + } + l761: + if buffer[position] != rune('3') { + goto l758 + } + position++ + if buffer[position] != rune('2') { + goto l758 + } + position++ + goto l731 + l758: + position, tokenIndex = position731, tokenIndex731 + { + position764, tokenIndex764 := position, tokenIndex + if buffer[position] != rune('v') { + goto l765 + } + position++ + goto l764 + l765: + position, tokenIndex = position764, tokenIndex764 + if buffer[position] != rune('V') { + goto l763 + } + position++ + } + l764: + { + position766, tokenIndex766 := position, tokenIndex + if buffer[position] != rune('l') { + goto l767 + } + position++ + goto l766 + l767: + position, tokenIndex = position766, tokenIndex766 + if buffer[position] != rune('L') { + goto l763 + } + position++ + } + l766: + if buffer[position] != rune('6') { + goto l763 + } + position++ + if buffer[position] != rune('4') { + goto l763 + } + position++ + goto l731 + l763: + position, tokenIndex = position731, tokenIndex731 + { + position769, tokenIndex769 := position, tokenIndex + if buffer[position] != rune('v') { + goto l770 + } + position++ + goto l769 + l770: + position, tokenIndex = position769, tokenIndex769 + if buffer[position] != rune('V') { + goto l768 + } + position++ + } + l769: + { + position771, tokenIndex771 := position, tokenIndex + if buffer[position] != rune('l') { goto l772 } - goto l755 + position++ + goto l771 l772: - position, tokenIndex = position755, tokenIndex755 - if !_rules[ruleLow12BitsSymbolRef]() { - goto l773 - } - goto l755 - l773: - position, tokenIndex = position755, tokenIndex755 - if !_rules[ruleARMRegister]() { - goto l751 - } - } - l755: - { - position774, tokenIndex774 := position, tokenIndex - if buffer[position] != rune(',') { - goto l774 + position, tokenIndex = position771, tokenIndex771 + if buffer[position] != rune('L') { + goto l768 } position++ - { - position776, tokenIndex776 := position, tokenIndex - if !_rules[ruleWS]() { - goto l776 - } - goto l777 - l776: - position, tokenIndex = position776, tokenIndex776 - } - l777: - if !_rules[ruleARMConstantTweak]() { - goto l774 - } - goto l775 - l774: - position, tokenIndex = position774, tokenIndex774 } - l775: - goto l752 - l751: - position, tokenIndex = position751, tokenIndex751 - } - l752: - if buffer[position] != rune(']') { - goto l749 - } - position++ - { - position778, tokenIndex778 := position, tokenIndex - if !_rules[ruleARMPostincrement]() { + l771: + if buffer[position] != rune('1') { + goto l768 + } + position++ + if buffer[position] != rune('2') { + goto l768 + } + position++ + if buffer[position] != rune('8') { + goto l768 + } + position++ + goto l731 + l768: + position, tokenIndex = position731, tokenIndex731 + { + position774, tokenIndex774 := position, tokenIndex + if buffer[position] != rune('v') { + goto l775 + } + position++ + goto l774 + l775: + position, tokenIndex = position774, tokenIndex774 + if buffer[position] != rune('V') { + goto l773 + } + position++ + } + l774: + { + position776, tokenIndex776 := position, tokenIndex + if buffer[position] != rune('l') { + goto l777 + } + position++ + goto l776 + l777: + position, tokenIndex = position776, tokenIndex776 + if buffer[position] != rune('L') { + goto l773 + } + position++ + } + l776: + if buffer[position] != rune('2') { + goto l773 + } + position++ + if buffer[position] != rune('5') { + goto l773 + } + position++ + if buffer[position] != rune('6') { + goto l773 + } + position++ + goto l731 + l773: + position, tokenIndex = position731, tokenIndex731 + { + position779, tokenIndex779 := position, tokenIndex + if buffer[position] != rune('m') { + goto l780 + } + position++ + goto l779 + l780: + position, tokenIndex = position779, tokenIndex779 + if buffer[position] != rune('M') { + goto l778 + } + position++ + } + l779: + { + position781, tokenIndex781 := position, tokenIndex + if buffer[position] != rune('u') { + goto l782 + } + position++ + goto l781 + l782: + position, tokenIndex = position781, tokenIndex781 + if buffer[position] != rune('U') { + goto l778 + } + position++ + } + l781: + { + position783, tokenIndex783 := position, tokenIndex + if buffer[position] != rune('l') { + goto l784 + } + position++ + goto l783 + l784: + position, tokenIndex = position783, tokenIndex783 + if buffer[position] != rune('L') { + goto l778 + } + position++ + } + l783: + if buffer[position] != rune('3') { goto l778 } - goto l779 + position++ + goto l731 l778: - position, tokenIndex = position778, tokenIndex778 - } - l779: - add(ruleARMBaseIndexScale, position750) - } - return true - l749: - position, tokenIndex = position749, tokenIndex749 - return false - }, - /* 51 ARMGOTLow12 <- <(':' ('g' / 'G') ('o' / 'O') ('t' / 'T') '_' ('l' / 'L') ('o' / 'O') '1' '2' ':' SymbolName)> */ - func() bool { - position780, tokenIndex780 := position, tokenIndex - { - position781 := position - if buffer[position] != rune(':') { - goto l780 - } - position++ - { - position782, tokenIndex782 := position, tokenIndex - if buffer[position] != rune('g') { - goto l783 + position, tokenIndex = position731, tokenIndex731 + { + position786, tokenIndex786 := position, tokenIndex + if buffer[position] != rune('m') { + goto l787 + } + position++ + goto l786 + l787: + position, tokenIndex = position786, tokenIndex786 + if buffer[position] != rune('M') { + goto l785 + } + position++ } - position++ - goto l782 - l783: - position, tokenIndex = position782, tokenIndex782 - if buffer[position] != rune('G') { - goto l780 + l786: + { + position788, tokenIndex788 := position, tokenIndex + if buffer[position] != rune('u') { + goto l789 + } + position++ + goto l788 + l789: + position, tokenIndex = position788, tokenIndex788 + if buffer[position] != rune('U') { + goto l785 + } + position++ } - position++ - } - l782: - { - position784, tokenIndex784 := position, tokenIndex - if buffer[position] != rune('o') { + l788: + { + position790, tokenIndex790 := position, tokenIndex + if buffer[position] != rune('l') { + goto l791 + } + position++ + goto l790 + l791: + position, tokenIndex = position790, tokenIndex790 + if buffer[position] != rune('L') { + goto l785 + } + position++ + } + l790: + if buffer[position] != rune('4') { goto l785 } position++ - goto l784 + goto l731 l785: - position, tokenIndex = position784, tokenIndex784 - if buffer[position] != rune('O') { - goto l780 + position, tokenIndex = position731, tokenIndex731 + { + position792, tokenIndex792 := position, tokenIndex + if buffer[position] != rune('a') { + goto l793 + } + position++ + goto l792 + l793: + position, tokenIndex = position792, tokenIndex792 + if buffer[position] != rune('A') { + goto l729 + } + position++ } - position++ - } - l784: - { - position786, tokenIndex786 := position, tokenIndex - if buffer[position] != rune('t') { - goto l787 + l792: + { + position794, tokenIndex794 := position, tokenIndex + if buffer[position] != rune('l') { + goto l795 + } + position++ + goto l794 + l795: + position, tokenIndex = position794, tokenIndex794 + if buffer[position] != rune('L') { + goto l729 + } + position++ } - position++ - goto l786 - l787: - position, tokenIndex = position786, tokenIndex786 - if buffer[position] != rune('T') { - goto l780 - } - position++ - } - l786: - if buffer[position] != rune('_') { - goto l780 - } - position++ - { - position788, tokenIndex788 := position, tokenIndex - if buffer[position] != rune('l') { - goto l789 - } - position++ - goto l788 - l789: - position, tokenIndex = position788, tokenIndex788 - if buffer[position] != rune('L') { - goto l780 - } - position++ - } - l788: - { - position790, tokenIndex790 := position, tokenIndex - if buffer[position] != rune('o') { - goto l791 - } - position++ - goto l790 - l791: - position, tokenIndex = position790, tokenIndex790 - if buffer[position] != rune('O') { - goto l780 - } - position++ - } - l790: - if buffer[position] != rune('1') { - goto l780 - } - position++ - if buffer[position] != rune('2') { - goto l780 - } - position++ - if buffer[position] != rune(':') { - goto l780 - } - position++ - if !_rules[ruleSymbolName]() { - goto l780 - } - add(ruleARMGOTLow12, position781) - } - return true - l780: - position, tokenIndex = position780, tokenIndex780 - return false - }, - /* 52 ARMPostincrement <- <'!'> */ - func() bool { - position792, tokenIndex792 := position, tokenIndex - { - position793 := position - if buffer[position] != rune('!') { - goto l792 - } - position++ - add(ruleARMPostincrement, position793) - } - return true - l792: - position, tokenIndex = position792, tokenIndex792 - return false - }, - /* 53 BaseIndexScale <- <('(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)?)? ')')> */ - func() bool { - position794, tokenIndex794 := position, tokenIndex - { - position795 := position - if buffer[position] != rune('(') { - goto l794 - } - position++ - { - position796, tokenIndex796 := position, tokenIndex - if !_rules[ruleRegisterOrConstant]() { + l794: + { + position796, tokenIndex796 := position, tokenIndex + if buffer[position] != rune('l') { + goto l797 + } + position++ goto l796 + l797: + position, tokenIndex = position796, tokenIndex796 + if buffer[position] != rune('L') { + goto l729 + } + position++ } - goto l797 l796: - position, tokenIndex = position796, tokenIndex796 } - l797: + l731: { position798, tokenIndex798 := position, tokenIndex - if !_rules[ruleWS]() { - goto l798 + { + position799, tokenIndex799 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l800 + } + position++ + goto l799 + l800: + position, tokenIndex = position799, tokenIndex799 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l801 + } + position++ + goto l799 + l801: + position, tokenIndex = position799, tokenIndex799 + { + position803, tokenIndex803 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { + goto l804 + } + position++ + goto l803 + l804: + position, tokenIndex = position803, tokenIndex803 + if c := buffer[position]; c < rune('A') || c > rune('Z') { + goto l802 + } + position++ + } + l803: + goto l799 + l802: + position, tokenIndex = position799, tokenIndex799 + if buffer[position] != rune('_') { + goto l798 + } + position++ } - goto l799 + l799: + goto l729 l798: position, tokenIndex = position798, tokenIndex798 } - l799: - { - position800, tokenIndex800 := position, tokenIndex - if buffer[position] != rune(',') { - goto l800 - } - position++ - { - position802, tokenIndex802 := position, tokenIndex - if !_rules[ruleWS]() { - goto l802 - } - goto l803 - l802: - position, tokenIndex = position802, tokenIndex802 - } - l803: - if !_rules[ruleRegisterOrConstant]() { - goto l800 - } - { - position804, tokenIndex804 := position, tokenIndex - if !_rules[ruleWS]() { - goto l804 - } - goto l805 - l804: - position, tokenIndex = position804, tokenIndex804 - } - l805: - { - position806, tokenIndex806 := position, tokenIndex - if buffer[position] != rune(',') { - goto l806 - } - position++ - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l806 - } - position++ - l808: - { - position809, tokenIndex809 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l809 - } - position++ - goto l808 - l809: - position, tokenIndex = position809, tokenIndex809 - } - goto l807 - l806: - position, tokenIndex = position806, tokenIndex806 - } - l807: - goto l801 - l800: - position, tokenIndex = position800, tokenIndex800 - } - l801: - if buffer[position] != rune(')') { - goto l794 - } - position++ - add(ruleBaseIndexScale, position795) + add(ruleSVE2SpecialValue, position730) } return true - l794: - position, tokenIndex = position794, tokenIndex794 + l729: + position, tokenIndex = position729, tokenIndex729 return false }, - /* 54 Operator <- <('+' / '-')> */ + /* 48 MemoryRef <- <((SymbolRef BaseIndexScale) / SymbolRef / Low12BitsSymbolRef / (Offset* BaseIndexScale) / (SegmentRegister Offset BaseIndexScale) / (SegmentRegister BaseIndexScale) / (SegmentRegister Offset) / ARMBaseIndexScale / BaseIndexScale)> */ func() bool { - position810, tokenIndex810 := position, tokenIndex + position805, tokenIndex805 := position, tokenIndex { - position811 := position + position806 := position { - position812, tokenIndex812 := position, tokenIndex - if buffer[position] != rune('+') { - goto l813 + position807, tokenIndex807 := position, tokenIndex + if !_rules[ruleSymbolRef]() { + goto l808 } - position++ - goto l812 - l813: - position, tokenIndex = position812, tokenIndex812 - if buffer[position] != rune('-') { + if !_rules[ruleBaseIndexScale]() { + goto l808 + } + goto l807 + l808: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleSymbolRef]() { + goto l809 + } + goto l807 + l809: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleLow12BitsSymbolRef]() { goto l810 } - position++ - } - l812: - add(ruleOperator, position811) - } - return true - l810: - position, tokenIndex = position810, tokenIndex810 - return false - }, - /* 55 Offset <- <('+'? '-'? (('0' ('b' / 'B') ('0' / '1')+) / ('0' ('x' / 'X') ([0-9] / [0-9] / ([a-f] / [A-F]))+) / [0-9]+))> */ - func() bool { - position814, tokenIndex814 := position, tokenIndex - { - position815 := position - { - position816, tokenIndex816 := position, tokenIndex - if buffer[position] != rune('+') { - goto l816 - } - position++ - goto l817 - l816: - position, tokenIndex = position816, tokenIndex816 - } - l817: - { - position818, tokenIndex818 := position, tokenIndex - if buffer[position] != rune('-') { - goto l818 - } - position++ - goto l819 - l818: - position, tokenIndex = position818, tokenIndex818 - } - l819: - { - position820, tokenIndex820 := position, tokenIndex - if buffer[position] != rune('0') { - goto l821 - } - position++ + goto l807 + l810: + position, tokenIndex = position807, tokenIndex807 + l812: { - position822, tokenIndex822 := position, tokenIndex - if buffer[position] != rune('b') { - goto l823 + position813, tokenIndex813 := position, tokenIndex + if !_rules[ruleOffset]() { + goto l813 } - position++ - goto l822 - l823: - position, tokenIndex = position822, tokenIndex822 - if buffer[position] != rune('B') { - goto l821 - } - position++ + goto l812 + l813: + position, tokenIndex = position813, tokenIndex813 } - l822: - { - position826, tokenIndex826 := position, tokenIndex - if buffer[position] != rune('0') { - goto l827 - } - position++ - goto l826 - l827: - position, tokenIndex = position826, tokenIndex826 - if buffer[position] != rune('1') { - goto l821 - } - position++ + if !_rules[ruleBaseIndexScale]() { + goto l811 } - l826: - l824: - { - position825, tokenIndex825 := position, tokenIndex - { - position828, tokenIndex828 := position, tokenIndex - if buffer[position] != rune('0') { - goto l829 - } - position++ - goto l828 - l829: - position, tokenIndex = position828, tokenIndex828 - if buffer[position] != rune('1') { - goto l825 - } - position++ - } - l828: - goto l824 - l825: - position, tokenIndex = position825, tokenIndex825 - } - goto l820 - l821: - position, tokenIndex = position820, tokenIndex820 - if buffer[position] != rune('0') { - goto l830 - } - position++ - { - position831, tokenIndex831 := position, tokenIndex - if buffer[position] != rune('x') { - goto l832 - } - position++ - goto l831 - l832: - position, tokenIndex = position831, tokenIndex831 - if buffer[position] != rune('X') { - goto l830 - } - position++ - } - l831: - { - position835, tokenIndex835 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l836 - } - position++ - goto l835 - l836: - position, tokenIndex = position835, tokenIndex835 - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l837 - } - position++ - goto l835 - l837: - position, tokenIndex = position835, tokenIndex835 - { - position838, tokenIndex838 := position, tokenIndex - if c := buffer[position]; c < rune('a') || c > rune('f') { - goto l839 - } - position++ - goto l838 - l839: - position, tokenIndex = position838, tokenIndex838 - if c := buffer[position]; c < rune('A') || c > rune('F') { - goto l830 - } - position++ - } - l838: - } - l835: - l833: - { - position834, tokenIndex834 := position, tokenIndex - { - position840, tokenIndex840 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l841 - } - position++ - goto l840 - l841: - position, tokenIndex = position840, tokenIndex840 - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l842 - } - position++ - goto l840 - l842: - position, tokenIndex = position840, tokenIndex840 - { - position843, tokenIndex843 := position, tokenIndex - if c := buffer[position]; c < rune('a') || c > rune('f') { - goto l844 - } - position++ - goto l843 - l844: - position, tokenIndex = position843, tokenIndex843 - if c := buffer[position]; c < rune('A') || c > rune('F') { - goto l834 - } - position++ - } - l843: - } - l840: - goto l833 - l834: - position, tokenIndex = position834, tokenIndex834 - } - goto l820 - l830: - position, tokenIndex = position820, tokenIndex820 - if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l807 + l811: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleSegmentRegister]() { goto l814 } - position++ - l845: - { - position846, tokenIndex846 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l846 - } - position++ - goto l845 - l846: - position, tokenIndex = position846, tokenIndex846 + if !_rules[ruleOffset]() { + goto l814 + } + if !_rules[ruleBaseIndexScale]() { + goto l814 + } + goto l807 + l814: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleSegmentRegister]() { + goto l815 + } + if !_rules[ruleBaseIndexScale]() { + goto l815 + } + goto l807 + l815: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleSegmentRegister]() { + goto l816 + } + if !_rules[ruleOffset]() { + goto l816 + } + goto l807 + l816: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleARMBaseIndexScale]() { + goto l817 + } + goto l807 + l817: + position, tokenIndex = position807, tokenIndex807 + if !_rules[ruleBaseIndexScale]() { + goto l805 } } - l820: - add(ruleOffset, position815) + l807: + add(ruleMemoryRef, position806) } return true - l814: - position, tokenIndex = position814, tokenIndex814 + l805: + position, tokenIndex = position805, tokenIndex805 return false }, - /* 56 Section <- <([a-z] / [A-Z] / '@')+> */ + /* 49 SymbolRef <- <((Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?)> */ func() bool { - position847, tokenIndex847 := position, tokenIndex + position818, tokenIndex818 := position, tokenIndex { - position848 := position + position819 := position { - position851, tokenIndex851 := position, tokenIndex - if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l852 + position820, tokenIndex820 := position, tokenIndex + l822: + { + position823, tokenIndex823 := position, tokenIndex + if !_rules[ruleOffset]() { + goto l823 + } + goto l822 + l823: + position, tokenIndex = position823, tokenIndex823 + } + if buffer[position] != rune('+') { + goto l820 } position++ - goto l851 - l852: - position, tokenIndex = position851, tokenIndex851 - if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l853 + goto l821 + l820: + position, tokenIndex = position820, tokenIndex820 + } + l821: + { + position824, tokenIndex824 := position, tokenIndex + if !_rules[ruleLocalSymbol]() { + goto l825 } - position++ - goto l851 - l853: - position, tokenIndex = position851, tokenIndex851 + goto l824 + l825: + position, tokenIndex = position824, tokenIndex824 + if !_rules[ruleSymbolName]() { + goto l818 + } + } + l824: + l826: + { + position827, tokenIndex827 := position, tokenIndex + if !_rules[ruleOffset]() { + goto l827 + } + goto l826 + l827: + position, tokenIndex = position827, tokenIndex827 + } + { + position828, tokenIndex828 := position, tokenIndex if buffer[position] != rune('@') { - goto l847 + goto l828 } position++ - } - l851: - l849: - { - position850, tokenIndex850 := position, tokenIndex - { - position854, tokenIndex854 := position, tokenIndex - if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l855 - } - position++ - goto l854 - l855: - position, tokenIndex = position854, tokenIndex854 - if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l856 - } - position++ - goto l854 - l856: - position, tokenIndex = position854, tokenIndex854 - if buffer[position] != rune('@') { - goto l850 - } - position++ + if !_rules[ruleSection]() { + goto l828 } - l854: - goto l849 - l850: - position, tokenIndex = position850, tokenIndex850 + l830: + { + position831, tokenIndex831 := position, tokenIndex + if !_rules[ruleOffset]() { + goto l831 + } + goto l830 + l831: + position, tokenIndex = position831, tokenIndex831 + } + goto l829 + l828: + position, tokenIndex = position828, tokenIndex828 } - add(ruleSection, position848) + l829: + add(ruleSymbolRef, position819) } return true - l847: - position, tokenIndex = position847, tokenIndex847 + l818: + position, tokenIndex = position818, tokenIndex818 return false }, - /* 57 SegmentRegister <- <('%' ([c-g] / 's') ('s' ':'))> */ + /* 50 Low12BitsSymbolRef <- <(':' ('l' / 'L') ('o' / 'O') '1' '2' ':' (LocalSymbol / SymbolName) Offset?)> */ func() bool { - position857, tokenIndex857 := position, tokenIndex + position832, tokenIndex832 := position, tokenIndex { - position858 := position - if buffer[position] != rune('%') { - goto l857 + position833 := position + if buffer[position] != rune(':') { + goto l832 } position++ { - position859, tokenIndex859 := position, tokenIndex - if c := buffer[position]; c < rune('c') || c > rune('g') { - goto l860 + position834, tokenIndex834 := position, tokenIndex + if buffer[position] != rune('l') { + goto l835 } position++ - goto l859 - l860: - position, tokenIndex = position859, tokenIndex859 - if buffer[position] != rune('s') { - goto l857 + goto l834 + l835: + position, tokenIndex = position834, tokenIndex834 + if buffer[position] != rune('L') { + goto l832 } position++ } - l859: - if buffer[position] != rune('s') { - goto l857 + l834: + { + position836, tokenIndex836 := position, tokenIndex + if buffer[position] != rune('o') { + goto l837 + } + position++ + goto l836 + l837: + position, tokenIndex = position836, tokenIndex836 + if buffer[position] != rune('O') { + goto l832 + } + position++ + } + l836: + if buffer[position] != rune('1') { + goto l832 + } + position++ + if buffer[position] != rune('2') { + goto l832 } position++ if buffer[position] != rune(':') { - goto l857 + goto l832 } position++ - add(ruleSegmentRegister, position858) + { + position838, tokenIndex838 := position, tokenIndex + if !_rules[ruleLocalSymbol]() { + goto l839 + } + goto l838 + l839: + position, tokenIndex = position838, tokenIndex838 + if !_rules[ruleSymbolName]() { + goto l832 + } + } + l838: + { + position840, tokenIndex840 := position, tokenIndex + if !_rules[ruleOffset]() { + goto l840 + } + goto l841 + l840: + position, tokenIndex = position840, tokenIndex840 + } + l841: + add(ruleLow12BitsSymbolRef, position833) } return true - l857: - position, tokenIndex = position857, tokenIndex857 + l832: + position, tokenIndex = position832, tokenIndex832 + return false + }, + /* 51 ARMBaseIndexScale <- <('[' ARMRegister (',' WS? (('#' Offset (('*' [0-9]+) / ('*' '(' [0-9]+ Operator [0-9]+ ')') / ('+' [0-9]+)*)?) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement?)> */ + func() bool { + position842, tokenIndex842 := position, tokenIndex + { + position843 := position + if buffer[position] != rune('[') { + goto l842 + } + position++ + if !_rules[ruleARMRegister]() { + goto l842 + } + { + position844, tokenIndex844 := position, tokenIndex + if buffer[position] != rune(',') { + goto l844 + } + position++ + { + position846, tokenIndex846 := position, tokenIndex + if !_rules[ruleWS]() { + goto l846 + } + goto l847 + l846: + position, tokenIndex = position846, tokenIndex846 + } + l847: + { + position848, tokenIndex848 := position, tokenIndex + if buffer[position] != rune('#') { + goto l849 + } + position++ + if !_rules[ruleOffset]() { + goto l849 + } + { + position850, tokenIndex850 := position, tokenIndex + { + position852, tokenIndex852 := position, tokenIndex + if buffer[position] != rune('*') { + goto l853 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l853 + } + position++ + l854: + { + position855, tokenIndex855 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l855 + } + position++ + goto l854 + l855: + position, tokenIndex = position855, tokenIndex855 + } + goto l852 + l853: + position, tokenIndex = position852, tokenIndex852 + if buffer[position] != rune('*') { + goto l856 + } + position++ + if buffer[position] != rune('(') { + goto l856 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l856 + } + position++ + l857: + { + position858, tokenIndex858 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l858 + } + position++ + goto l857 + l858: + position, tokenIndex = position858, tokenIndex858 + } + if !_rules[ruleOperator]() { + goto l856 + } + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l856 + } + position++ + l859: + { + position860, tokenIndex860 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l860 + } + position++ + goto l859 + l860: + position, tokenIndex = position860, tokenIndex860 + } + if buffer[position] != rune(')') { + goto l856 + } + position++ + goto l852 + l856: + position, tokenIndex = position852, tokenIndex852 + l861: + { + position862, tokenIndex862 := position, tokenIndex + if buffer[position] != rune('+') { + goto l862 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l862 + } + position++ + l863: + { + position864, tokenIndex864 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l864 + } + position++ + goto l863 + l864: + position, tokenIndex = position864, tokenIndex864 + } + goto l861 + l862: + position, tokenIndex = position862, tokenIndex862 + } + } + l852: + goto l851 + + position, tokenIndex = position850, tokenIndex850 + } + l851: + goto l848 + l849: + position, tokenIndex = position848, tokenIndex848 + if !_rules[ruleARMGOTLow12]() { + goto l865 + } + goto l848 + l865: + position, tokenIndex = position848, tokenIndex848 + if !_rules[ruleLow12BitsSymbolRef]() { + goto l866 + } + goto l848 + l866: + position, tokenIndex = position848, tokenIndex848 + if !_rules[ruleARMRegister]() { + goto l844 + } + } + l848: + { + position867, tokenIndex867 := position, tokenIndex + if buffer[position] != rune(',') { + goto l867 + } + position++ + { + position869, tokenIndex869 := position, tokenIndex + if !_rules[ruleWS]() { + goto l869 + } + goto l870 + l869: + position, tokenIndex = position869, tokenIndex869 + } + l870: + if !_rules[ruleARMConstantTweak]() { + goto l867 + } + goto l868 + l867: + position, tokenIndex = position867, tokenIndex867 + } + l868: + goto l845 + l844: + position, tokenIndex = position844, tokenIndex844 + } + l845: + if buffer[position] != rune(']') { + goto l842 + } + position++ + { + position871, tokenIndex871 := position, tokenIndex + if !_rules[ruleARMPostincrement]() { + goto l871 + } + goto l872 + l871: + position, tokenIndex = position871, tokenIndex871 + } + l872: + add(ruleARMBaseIndexScale, position843) + } + return true + l842: + position, tokenIndex = position842, tokenIndex842 + return false + }, + /* 52 ARMGOTLow12 <- <(':' ('g' / 'G') ('o' / 'O') ('t' / 'T') '_' ('l' / 'L') ('o' / 'O') '1' '2' ':' SymbolName)> */ + func() bool { + position873, tokenIndex873 := position, tokenIndex + { + position874 := position + if buffer[position] != rune(':') { + goto l873 + } + position++ + { + position875, tokenIndex875 := position, tokenIndex + if buffer[position] != rune('g') { + goto l876 + } + position++ + goto l875 + l876: + position, tokenIndex = position875, tokenIndex875 + if buffer[position] != rune('G') { + goto l873 + } + position++ + } + l875: + { + position877, tokenIndex877 := position, tokenIndex + if buffer[position] != rune('o') { + goto l878 + } + position++ + goto l877 + l878: + position, tokenIndex = position877, tokenIndex877 + if buffer[position] != rune('O') { + goto l873 + } + position++ + } + l877: + { + position879, tokenIndex879 := position, tokenIndex + if buffer[position] != rune('t') { + goto l880 + } + position++ + goto l879 + l880: + position, tokenIndex = position879, tokenIndex879 + if buffer[position] != rune('T') { + goto l873 + } + position++ + } + l879: + if buffer[position] != rune('_') { + goto l873 + } + position++ + { + position881, tokenIndex881 := position, tokenIndex + if buffer[position] != rune('l') { + goto l882 + } + position++ + goto l881 + l882: + position, tokenIndex = position881, tokenIndex881 + if buffer[position] != rune('L') { + goto l873 + } + position++ + } + l881: + { + position883, tokenIndex883 := position, tokenIndex + if buffer[position] != rune('o') { + goto l884 + } + position++ + goto l883 + l884: + position, tokenIndex = position883, tokenIndex883 + if buffer[position] != rune('O') { + goto l873 + } + position++ + } + l883: + if buffer[position] != rune('1') { + goto l873 + } + position++ + if buffer[position] != rune('2') { + goto l873 + } + position++ + if buffer[position] != rune(':') { + goto l873 + } + position++ + if !_rules[ruleSymbolName]() { + goto l873 + } + add(ruleARMGOTLow12, position874) + } + return true + l873: + position, tokenIndex = position873, tokenIndex873 + return false + }, + /* 53 ARMPostincrement <- <'!'> */ + func() bool { + position885, tokenIndex885 := position, tokenIndex + { + position886 := position + if buffer[position] != rune('!') { + goto l885 + } + position++ + add(ruleARMPostincrement, position886) + } + return true + l885: + position, tokenIndex = position885, tokenIndex885 + return false + }, + /* 54 BaseIndexScale <- <('(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)?)? ')')> */ + func() bool { + position887, tokenIndex887 := position, tokenIndex + { + position888 := position + if buffer[position] != rune('(') { + goto l887 + } + position++ + { + position889, tokenIndex889 := position, tokenIndex + if !_rules[ruleRegisterOrConstant]() { + goto l889 + } + goto l890 + l889: + position, tokenIndex = position889, tokenIndex889 + } + l890: + { + position891, tokenIndex891 := position, tokenIndex + if !_rules[ruleWS]() { + goto l891 + } + goto l892 + l891: + position, tokenIndex = position891, tokenIndex891 + } + l892: + { + position893, tokenIndex893 := position, tokenIndex + if buffer[position] != rune(',') { + goto l893 + } + position++ + { + position895, tokenIndex895 := position, tokenIndex + if !_rules[ruleWS]() { + goto l895 + } + goto l896 + l895: + position, tokenIndex = position895, tokenIndex895 + } + l896: + if !_rules[ruleRegisterOrConstant]() { + goto l893 + } + { + position897, tokenIndex897 := position, tokenIndex + if !_rules[ruleWS]() { + goto l897 + } + goto l898 + l897: + position, tokenIndex = position897, tokenIndex897 + } + l898: + { + position899, tokenIndex899 := position, tokenIndex + if buffer[position] != rune(',') { + goto l899 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l899 + } + position++ + l901: + { + position902, tokenIndex902 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l902 + } + position++ + goto l901 + l902: + position, tokenIndex = position902, tokenIndex902 + } + goto l900 + l899: + position, tokenIndex = position899, tokenIndex899 + } + l900: + goto l894 + l893: + position, tokenIndex = position893, tokenIndex893 + } + l894: + if buffer[position] != rune(')') { + goto l887 + } + position++ + add(ruleBaseIndexScale, position888) + } + return true + l887: + position, tokenIndex = position887, tokenIndex887 + return false + }, + /* 55 Operator <- <('+' / '-')> */ + func() bool { + position903, tokenIndex903 := position, tokenIndex + { + position904 := position + { + position905, tokenIndex905 := position, tokenIndex + if buffer[position] != rune('+') { + goto l906 + } + position++ + goto l905 + l906: + position, tokenIndex = position905, tokenIndex905 + if buffer[position] != rune('-') { + goto l903 + } + position++ + } + l905: + add(ruleOperator, position904) + } + return true + l903: + position, tokenIndex = position903, tokenIndex903 + return false + }, + /* 56 Offset <- <('+'? '-'? (('0' ('b' / 'B') ('0' / '1')+) / ('0' ('x' / 'X') ([0-9] / [0-9] / ([a-f] / [A-F]))+) / [0-9]+))> */ + func() bool { + position907, tokenIndex907 := position, tokenIndex + { + position908 := position + { + position909, tokenIndex909 := position, tokenIndex + if buffer[position] != rune('+') { + goto l909 + } + position++ + goto l910 + l909: + position, tokenIndex = position909, tokenIndex909 + } + l910: + { + position911, tokenIndex911 := position, tokenIndex + if buffer[position] != rune('-') { + goto l911 + } + position++ + goto l912 + l911: + position, tokenIndex = position911, tokenIndex911 + } + l912: + { + position913, tokenIndex913 := position, tokenIndex + if buffer[position] != rune('0') { + goto l914 + } + position++ + { + position915, tokenIndex915 := position, tokenIndex + if buffer[position] != rune('b') { + goto l916 + } + position++ + goto l915 + l916: + position, tokenIndex = position915, tokenIndex915 + if buffer[position] != rune('B') { + goto l914 + } + position++ + } + l915: + { + position919, tokenIndex919 := position, tokenIndex + if buffer[position] != rune('0') { + goto l920 + } + position++ + goto l919 + l920: + position, tokenIndex = position919, tokenIndex919 + if buffer[position] != rune('1') { + goto l914 + } + position++ + } + l919: + l917: + { + position918, tokenIndex918 := position, tokenIndex + { + position921, tokenIndex921 := position, tokenIndex + if buffer[position] != rune('0') { + goto l922 + } + position++ + goto l921 + l922: + position, tokenIndex = position921, tokenIndex921 + if buffer[position] != rune('1') { + goto l918 + } + position++ + } + l921: + goto l917 + l918: + position, tokenIndex = position918, tokenIndex918 + } + goto l913 + l914: + position, tokenIndex = position913, tokenIndex913 + if buffer[position] != rune('0') { + goto l923 + } + position++ + { + position924, tokenIndex924 := position, tokenIndex + if buffer[position] != rune('x') { + goto l925 + } + position++ + goto l924 + l925: + position, tokenIndex = position924, tokenIndex924 + if buffer[position] != rune('X') { + goto l923 + } + position++ + } + l924: + { + position928, tokenIndex928 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l929 + } + position++ + goto l928 + l929: + position, tokenIndex = position928, tokenIndex928 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l930 + } + position++ + goto l928 + l930: + position, tokenIndex = position928, tokenIndex928 + { + position931, tokenIndex931 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('f') { + goto l932 + } + position++ + goto l931 + l932: + position, tokenIndex = position931, tokenIndex931 + if c := buffer[position]; c < rune('A') || c > rune('F') { + goto l923 + } + position++ + } + l931: + } + l928: + l926: + { + position927, tokenIndex927 := position, tokenIndex + { + position933, tokenIndex933 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l934 + } + position++ + goto l933 + l934: + position, tokenIndex = position933, tokenIndex933 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l935 + } + position++ + goto l933 + l935: + position, tokenIndex = position933, tokenIndex933 + { + position936, tokenIndex936 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('f') { + goto l937 + } + position++ + goto l936 + l937: + position, tokenIndex = position936, tokenIndex936 + if c := buffer[position]; c < rune('A') || c > rune('F') { + goto l927 + } + position++ + } + l936: + } + l933: + goto l926 + l927: + position, tokenIndex = position927, tokenIndex927 + } + goto l913 + l923: + position, tokenIndex = position913, tokenIndex913 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l907 + } + position++ + l938: + { + position939, tokenIndex939 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l939 + } + position++ + goto l938 + l939: + position, tokenIndex = position939, tokenIndex939 + } + } + l913: + add(ruleOffset, position908) + } + return true + l907: + position, tokenIndex = position907, tokenIndex907 + return false + }, + /* 57 Section <- <([a-z] / [A-Z] / '@')+> */ + func() bool { + position940, tokenIndex940 := position, tokenIndex + { + position941 := position + { + position944, tokenIndex944 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { + goto l945 + } + position++ + goto l944 + l945: + position, tokenIndex = position944, tokenIndex944 + if c := buffer[position]; c < rune('A') || c > rune('Z') { + goto l946 + } + position++ + goto l944 + l946: + position, tokenIndex = position944, tokenIndex944 + if buffer[position] != rune('@') { + goto l940 + } + position++ + } + l944: + l942: + { + position943, tokenIndex943 := position, tokenIndex + { + position947, tokenIndex947 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { + goto l948 + } + position++ + goto l947 + l948: + position, tokenIndex = position947, tokenIndex947 + if c := buffer[position]; c < rune('A') || c > rune('Z') { + goto l949 + } + position++ + goto l947 + l949: + position, tokenIndex = position947, tokenIndex947 + if buffer[position] != rune('@') { + goto l943 + } + position++ + } + l947: + goto l942 + l943: + position, tokenIndex = position943, tokenIndex943 + } + add(ruleSection, position941) + } + return true + l940: + position, tokenIndex = position940, tokenIndex940 + return false + }, + /* 58 SegmentRegister <- <('%' ([c-g] / 's') ('s' ':'))> */ + func() bool { + position950, tokenIndex950 := position, tokenIndex + { + position951 := position + if buffer[position] != rune('%') { + goto l950 + } + position++ + { + position952, tokenIndex952 := position, tokenIndex + if c := buffer[position]; c < rune('c') || c > rune('g') { + goto l953 + } + position++ + goto l952 + l953: + position, tokenIndex = position952, tokenIndex952 + if buffer[position] != rune('s') { + goto l950 + } + position++ + } + l952: + if buffer[position] != rune('s') { + goto l950 + } + position++ + if buffer[position] != rune(':') { + goto l950 + } + position++ + add(ruleSegmentRegister, position951) + } + return true + l950: + position, tokenIndex = position950, tokenIndex950 return false }, } diff --git a/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/in.s b/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/in.s index b21ebcb38e..f151c23856 100644 --- a/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/in.s +++ b/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/in.s @@ -78,9 +78,15 @@ foo: add w0, w1, b2, sxtw add w0, w1, b2, sxtx - // Aarch64 SVE2 added these forms: - ld1d { z1.d }, p0/z, [x13, x11, lsl #3] - ld1b { z11.b }, p15/z, [x10, #1, mul vl] + // Aarch64 SVE2 added these forms: + ld1d { z1.d }, p91/z, [x13, x11, lsl #3] + ld1b { z11.b }, p15/z, [x10, #1, mul vl] + st2d { z6.d, z7.d }, p0, [x12] + // Check that "p22" here isn't parsed as the "p22" register. + bl p224_point_add + ptrue p0.d, vl1 + // The "#7" here isn't a comment, it's now valid Aarch64 assembly. + cnth x8, all, mul #7 local_function: diff --git a/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/out.s b/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/out.s index 4c032653a2..c02461083c 100644 --- a/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/out.s +++ b/yass/third_party/boringssl/src/util/fipstools/delocate/testdata/aarch64-Basic/out.s @@ -125,9 +125,16 @@ foo: add w0, w1, b2, sxtw add w0, w1, b2, sxtx - // Aarch64 SVE2 added these forms: - ld1d { z1.d }, p0/z, [x13, x11, lsl #3] - ld1b { z11.b }, p15/z, [x10, #1, mul vl] + // Aarch64 SVE2 added these forms: + ld1d { z1.d }, p91/z, [x13, x11, lsl #3] + ld1b { z11.b }, p15/z, [x10, #1, mul vl] + st2d { z6.d, z7.d }, p0, [x12] + // Check that "p22" here isn't parsed as the "p22" register. +// WAS bl p224_point_add + bl bcm_redirector_p224_point_add + ptrue p0.d, vl1 + // The "#7" here isn't a comment, it's now valid Aarch64 assembly. + cnth x8, all, mul #7 .Llocal_function_local_target: local_function: @@ -144,6 +151,15 @@ bss_symbol: .loc 1 2 0 BORINGSSL_bcm_text_end: .p2align 2 +.hidden bcm_redirector_p224_point_add +.type bcm_redirector_p224_point_add, @function +bcm_redirector_p224_point_add: +.cfi_startproc + hint #34 // bti c + b p224_point_add +.cfi_endproc +.size bcm_redirector_p224_point_add, .-bcm_redirector_p224_point_add +.p2align 2 .hidden bcm_redirector_remote_function .type bcm_redirector_remote_function, @function bcm_redirector_remote_function: diff --git a/yass/third_party/boringssl/src/util/fipstools/test_fips.c b/yass/third_party/boringssl/src/util/fipstools/test_fips.c index 13b8d7d3da..bd0ec46646 100644 --- a/yass/third_party/boringssl/src/util/fipstools/test_fips.c +++ b/yass/third_party/boringssl/src/util/fipstools/test_fips.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,8 @@ int main(int argc, char **argv) { printf("No module version set\n"); goto err; } - printf("Module version: %" PRIu32 "\n", module_version); + printf("Module: '%s', version: %" PRIu32 "\n", FIPS_module_name(), + module_version); static const uint8_t kAESKey[16] = "BoringCrypto Key"; static const uint8_t kPlaintext[64] = @@ -216,6 +218,18 @@ int main(int argc, char **argv) { RSA_free(rsa_key); + /* Generating a key with a null output parameter. */ + printf("About to generate RSA key with null output\n"); + if (!RSA_generate_key_fips(NULL, 2048, NULL)) { + printf("RSA_generate_key_fips failed with null output parameter\n"); + ERR_clear_error(); + } else { + printf( + "RSA_generate_key_fips unexpectedly succeeded with null output " + "parameter\n"); + goto err; + } + EC_KEY *ec_key = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); if (ec_key == NULL) { printf("invalid ECDSA key\n"); @@ -261,6 +275,30 @@ int main(int argc, char **argv) { ECDSA_SIG_free(sig); EC_KEY_free(ec_key); + /* Generating a key with a null output pointer. */ + printf("About to generate P-256 key with NULL output\n"); + if (!EC_KEY_generate_key_fips(NULL)) { + printf("EC_KEY_generate_key_fips failed with a NULL output pointer.\n"); + ERR_clear_error(); + } else { + printf( + "EC_KEY_generate_key_fips unexpectedly succeeded with a NULL output " + "pointer.\n"); + goto err; + } + + /* ECDSA with an invalid public key. */ + ec_key = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); + static const uint8_t kNotValidX926[] = {1,2,3,4,5,6}; + if (!EC_KEY_oct2key(ec_key, kNotValidX926, sizeof(kNotValidX926), + /*ctx=*/NULL)) { + printf("Error while parsing invalid ECDSA public key"); + } else { + printf("Unexpected success while parsing invalid ECDSA public key"); + goto err; + } + EC_KEY_free(ec_key); + /* DBRG */ CTR_DRBG_STATE drbg; printf("About to seed CTR-DRBG with "); diff --git a/yass/third_party/boringssl/src/util/generate_build_files.py b/yass/third_party/boringssl/src/util/generate_build_files.py index 7024090c77..31b39f4a2f 100644 --- a/yass/third_party/boringssl/src/util/generate_build_files.py +++ b/yass/third_party/boringssl/src/util/generate_build_files.py @@ -23,48 +23,7 @@ import sys import json -# OS_ARCH_COMBOS maps from OS and platform to the OpenSSL assembly "style" for -# that platform and the extension used by asm files. -# -# TODO(https://crbug.com/boringssl/542): This probably should be a map, but some -# downstream scripts import this to find what folders to add/remove from git. -OS_ARCH_COMBOS = [ - ('apple', 'aarch64', 'ios64', [], 'S'), - ('apple', 'x86', 'macosx', ['-fPIC'], 'S'), - ('apple', 'x86_64', 'macosx', [], 'S'), - ('linux', 'arm', 'linux32', [], 'S'), - ('linux', 'aarch64', 'linux64', [], 'S'), - ('linux', 'x86', 'elf', ['-fPIC'], 'S'), - ('linux', 'x86_64', 'elf', [], 'S'), - ('win', 'x86', 'win32n', [], 'asm'), - ('win', 'x86_64', 'nasm', [], 'asm'), - ('win', 'aarch64', 'win64', [], 'S'), -] - -# NON_PERL_FILES enumerates assembly files that are not processed by the -# perlasm system. -NON_PERL_FILES = { - ('apple', 'x86_64'): [ - 'src/third_party/fiat/asm/fiat_curve25519_adx_mul.S', - 'src/third_party/fiat/asm/fiat_curve25519_adx_square.S', - 'src/third_party/fiat/asm/fiat_p256_adx_mul.S', - 'src/third_party/fiat/asm/fiat_p256_adx_sqr.S', - ], - ('linux', 'arm'): [ - 'src/crypto/curve25519/asm/x25519-asm-arm.S', - 'src/crypto/poly1305/poly1305_arm_asm.S', - ], - ('linux', 'x86_64'): [ - 'src/crypto/hrss/asm/poly_rq_mul.S', - 'src/third_party/fiat/asm/fiat_curve25519_adx_mul.S', - 'src/third_party/fiat/asm/fiat_curve25519_adx_square.S', - 'src/third_party/fiat/asm/fiat_p256_adx_mul.S', - 'src/third_party/fiat/asm/fiat_p256_adx_sqr.S', - ], -} - PREFIX = None -EMBED_TEST_DATA = True def PathOf(x): @@ -120,12 +79,12 @@ class Android(object): non_bcm_asm = self.FilterBcmAsm(files['crypto_asm'], False) bcm_asm = self.FilterBcmAsm(files['crypto_asm'], True) - self.PrintDefaults(blueprint, 'libcrypto_sources', non_bcm_c_files, non_bcm_asm) - self.PrintDefaults(blueprint, 'libcrypto_bcm_sources', bcm_c_files, bcm_asm) + self.PrintDefaults(blueprint, 'libcrypto_sources', non_bcm_c_files, asm_files=non_bcm_asm) + self.PrintDefaults(blueprint, 'libcrypto_bcm_sources', bcm_c_files, asm_files=bcm_asm) self.PrintDefaults(blueprint, 'libssl_sources', files['ssl']) self.PrintDefaults(blueprint, 'bssl_sources', files['tool']) self.PrintDefaults(blueprint, 'boringssl_test_support_sources', files['test_support']) - self.PrintDefaults(blueprint, 'boringssl_crypto_test_sources', files['crypto_test']) + self.PrintDefaults(blueprint, 'boringssl_crypto_test_sources', files['crypto_test'], data=files['crypto_test_data']) self.PrintDefaults(blueprint, 'boringssl_ssl_test_sources', files['ssl_test']) self.PrintDefaults(blueprint, 'libpki_sources', files['pki']) @@ -137,7 +96,7 @@ class Android(object): self.PrintVariableSection(makefile, 'crypto_sources_asm', files['crypto_asm']) - def PrintDefaults(self, blueprint, name, files, asm_files=[]): + def PrintDefaults(self, blueprint, name, files, asm_files=[], data=[]): """Print a cc_defaults section from a list of C files and optionally assembly outputs""" if asm_files: blueprint.write('\n') @@ -153,6 +112,11 @@ class Android(object): for f in sorted(files): blueprint.write(' "%s",\n' % f) blueprint.write(' ],\n') + if data: + blueprint.write(' data: [\n') + for f in sorted(data): + blueprint.write(' "%s",\n' % f) + blueprint.write(' ],\n') if asm_files: blueprint.write(' target: {\n') @@ -569,22 +533,6 @@ class JSON(object): with open('sources.json', 'w+') as f: json.dump(files, f, sort_keys=True, indent=2) -def FindCMakeFiles(directory): - """Returns list of all CMakeLists.txt files recursively in directory.""" - cmakefiles = [] - - for (path, _, filenames) in os.walk(directory): - for filename in filenames: - if filename == 'CMakeLists.txt': - cmakefiles.append(os.path.join(path, filename)) - - return cmakefiles - -def OnlyFIPSFragments(path, dent, is_dir): - return is_dir or (path.startswith( - os.path.join('src', 'crypto', 'fipsmodule', '')) and - NoTests(path, dent, is_dir)) - def NoTestsNorFIPSFragments(path, dent, is_dir): return (NoTests(path, dent, is_dir) and (is_dir or not OnlyFIPSFragments(path, dent, is_dir))) @@ -611,18 +559,6 @@ def NoTestRunnerFiles(path, dent, is_dir): return not is_dir or dent != 'runner' -def SSLHeaderFiles(path, dent, is_dir): - return dent in ['ssl.h', 'tls1.h', 'ssl23.h', 'ssl3.h', 'dtls1.h', 'srtp.h'] - - -def CryptoHeaderFiles(path, dent, is_dir): - if SSLHeaderFiles(path, dent, is_dir): - return False - if is_dir and dent == 'pki': - return False - return True - - def FindCFiles(directory, filter_func): """Recurses through directory and returns a list of paths to all the C source files that pass filter_func.""" @@ -679,219 +615,57 @@ def FindHeaderFiles(directory, filter_func): return hfiles -def ExtractPerlAsmFromCMakeFile(cmakefile): - """Parses the contents of the CMakeLists.txt file passed as an argument and - returns a list of all the perlasm() directives found in the file.""" - perlasms = [] - with open(cmakefile) as f: - for line in f: - line = line.strip() - if not line.startswith('perlasm('): - continue - if not line.endswith(')'): - raise ValueError('Bad perlasm line in %s' % cmakefile) - # Remove "perlasm(" from start and ")" from end - params = line[8:-1].split() - if len(params) < 4: - raise ValueError('Bad perlasm line in %s' % cmakefile) - perlasms.append({ - 'arch': params[1], - 'output': os.path.join(os.path.dirname(cmakefile), params[2]), - 'input': os.path.join(os.path.dirname(cmakefile), params[3]), - 'extra_args': params[4:], - }) - - return perlasms - - -def ReadPerlAsmOperations(): - """Returns a list of all perlasm() directives found in CMake config files in - src/.""" - perlasms = [] - cmakefiles = FindCMakeFiles('src') - - for cmakefile in cmakefiles: - perlasms.extend(ExtractPerlAsmFromCMakeFile(cmakefile)) - - return perlasms - - -def PerlAsm(output_filename, input_filename, perlasm_style, extra_args): - """Runs the a perlasm script and puts the output into output_filename.""" - base_dir = os.path.dirname(output_filename) - if not os.path.isdir(base_dir): - os.makedirs(base_dir) - subprocess.check_call( - ['perl', input_filename, perlasm_style] + extra_args + [output_filename]) - - -def WriteAsmFiles(perlasms): - """Generates asm files from perlasm directives for each supported OS x - platform combination.""" - asmfiles = {} - - for perlasm in perlasms: - for (osname, arch, perlasm_style, extra_args, asm_ext) in OS_ARCH_COMBOS: - if arch != perlasm['arch']: - continue - # TODO(https://crbug.com/boringssl/542): Now that we incorporate osname in - # the output filename, the asm files can just go in a single directory. - # For now, we keep them in target-specific directories to avoid breaking - # downstream scripts. - key = (osname, arch) - outDir = '%s-%s' % key - output = perlasm['output'] - if not output.startswith('src'): - raise ValueError('output missing src: %s' % output) - output = os.path.join(outDir, output[4:]) - output = '%s-%s.%s' % (output, osname, asm_ext) - PerlAsm(output, perlasm['input'], perlasm_style, - extra_args + perlasm['extra_args']) - asmfiles.setdefault(key, []).append(output) - - for (key, non_perl_asm_files) in NON_PERL_FILES.items(): - asmfiles.setdefault(key, []).extend(non_perl_asm_files) - - for files in asmfiles.values(): - files.sort() - - return asmfiles - - -def ExtractVariablesFromCMakeFile(cmakefile): - """Parses the contents of the CMakeLists.txt file passed as an argument and - returns a dictionary of exported source lists.""" - variables = {} - in_set_command = False - set_command = [] - with open(cmakefile) as f: - for line in f: - if '#' in line: - line = line[:line.index('#')] - line = line.strip() - - if not in_set_command: - if line.startswith('set('): - in_set_command = True - set_command = [] - elif line == ')': - in_set_command = False - if not set_command: - raise ValueError('Empty set command') - variables[set_command[0]] = set_command[1:] - else: - set_command.extend([c for c in line.split(' ') if c]) - - if in_set_command: - raise ValueError('Unfinished set command') - return variables - - def PrefixWithSrc(files): return ['src/' + x for x in files] def main(platforms): - cmake = ExtractVariablesFromCMakeFile(os.path.join('src', 'sources.cmake')) - crypto_c_files = (FindCFiles(os.path.join('src', 'crypto'), NoTestsNorFIPSFragments) + - FindCFiles(os.path.join('src', 'third_party', 'fiat'), NoTestsNorFIPSFragments)) - fips_fragments = FindCFiles(os.path.join('src', 'crypto', 'fipsmodule'), OnlyFIPSFragments) - ssl_source_files = FindCFiles(os.path.join('src', 'ssl'), NoTests) - tool_h_files = FindHeaderFiles(os.path.join('src', 'tool'), AllFiles) + with open(os.path.join('src', 'gen', 'sources.json')) as f: + sources = json.load(f) + bssl_sys_files = FindRustFiles(os.path.join('src', 'rust', 'bssl-sys', 'src')) bssl_crypto_files = FindRustFiles(os.path.join('src', 'rust', 'bssl-crypto', 'src')) - # BCM shared library C files - bcm_crypto_c_files = [ - os.path.join('src', 'crypto', 'fipsmodule', 'bcm.c') - ] - - # Generate err_data.c - with open('err_data.c', 'w+') as err_data: - subprocess.check_call(['go', 'run', 'err_data_generate.go'], - cwd=os.path.join('src', 'crypto', 'err'), - stdout=err_data) - crypto_c_files.append('err_data.c') - crypto_c_files.sort() - - test_support_h_files = ( - FindHeaderFiles(os.path.join('src', 'crypto', 'test'), AllFiles) + - FindHeaderFiles(os.path.join('src', 'ssl', 'test'), NoTestRunnerFiles)) - - crypto_test_files = [] - if EMBED_TEST_DATA: - # Generate crypto_test_data.cc - with open('crypto_test_data.cc', 'w+') as out: - subprocess.check_call( - ['go', 'run', 'util/embed_test_data.go'] + cmake['CRYPTO_TEST_DATA'], - cwd='src', - stdout=out) - crypto_test_files.append('crypto_test_data.cc') - - crypto_test_files += PrefixWithSrc(cmake['CRYPTO_TEST_SOURCES']) - crypto_test_files.sort() - fuzz_c_files = FindCFiles(os.path.join('src', 'fuzz'), NoTests) - ssl_h_files = FindHeaderFiles(os.path.join('src', 'include', 'openssl'), - SSLHeaderFiles) - - pki_internal_h_files = FindHeaderFiles(os.path.join('src', 'pki'), AllFiles) - - crypto_h_files = FindHeaderFiles(os.path.join('src', 'include', 'openssl'), - CryptoHeaderFiles) - pki_h_files = FindHeaderFiles( - os.path.join('src', 'include', 'openssl', 'pki'), AllFiles) - - ssl_internal_h_files = FindHeaderFiles(os.path.join('src', 'ssl'), NoTests) - crypto_internal_h_files = ( - FindHeaderFiles(os.path.join('src', 'crypto'), NoTests) + - FindHeaderFiles(os.path.join('src', 'third_party', 'fiat'), NoTests)) - - asm_outputs = sorted(WriteAsmFiles(ReadPerlAsmOperations()).items()) - - # Generate combined source lists for gas and nasm. Some files appear in - # multiple per-platform lists, so we de-duplicate. - # - # TODO(https://crbug.com/boringssl/542): It would be simpler to build the - # combined source lists directly. This is a remnant of the previous assembly - # strategy. When we move to pre-generated assembly files, this will be - # removed. - asm_sources = set() - nasm_sources = set() - for ((osname, arch), asm_files) in asm_outputs: - if (osname, arch) in (('win', 'x86'), ('win', 'x86_64')): - nasm_sources.update(asm_files) - else: - asm_sources.update(asm_files) + # TODO(crbug.com/boringssl/542): generate_build_files.py historically reported + # all the assembly files as part of libcrypto. Merge them for now, but we + # should split them out later. + crypto = sorted(sources['bcm']['srcs'] + sources['crypto']['srcs']) + crypto_asm = sorted(sources['bcm']['asm'] + sources['crypto']['asm'] + + sources['test_support']['asm']) + crypto_nasm = sorted(sources['bcm']['nasm'] + sources['crypto']['nasm'] + + sources['test_support']['nasm']) files = { - 'bcm_crypto': bcm_crypto_c_files, - 'crypto': crypto_c_files, - 'crypto_asm': sorted(list(asm_sources)), - 'crypto_nasm': sorted(list(nasm_sources)), - 'crypto_headers': crypto_h_files, - 'crypto_internal_headers': crypto_internal_h_files, - 'crypto_test': crypto_test_files, - 'crypto_test_data': sorted(PrefixWithSrc(cmake['CRYPTO_TEST_DATA'])), - 'fips_fragments': fips_fragments, + 'bcm_crypto': PrefixWithSrc(sources['bcm']['srcs']), + 'crypto': PrefixWithSrc(crypto), + 'crypto_asm': PrefixWithSrc(crypto_asm), + 'crypto_nasm': PrefixWithSrc(crypto_nasm), + 'crypto_headers': PrefixWithSrc(sources['crypto']['hdrs']), + 'crypto_internal_headers': + PrefixWithSrc(sources['crypto']['internal_hdrs']), + 'crypto_test': PrefixWithSrc(sources['crypto_test']['srcs']), + 'crypto_test_data': PrefixWithSrc(sources['crypto_test']['data']), + 'fips_fragments': PrefixWithSrc(sources['bcm']['internal_hdrs']), 'fuzz': fuzz_c_files, - 'pki': PrefixWithSrc(cmake['PKI_SOURCES']), - 'pki_headers': pki_h_files, - 'pki_internal_headers': sorted(list(pki_internal_h_files)), - 'pki_test': PrefixWithSrc(cmake['PKI_TEST_SOURCES']), - 'pki_test_data': PrefixWithSrc(cmake['PKI_TEST_DATA']), + 'pki': PrefixWithSrc(sources['pki']['srcs']), + 'pki_headers': PrefixWithSrc(sources['pki']['hdrs']), + 'pki_internal_headers': PrefixWithSrc(sources['pki']['internal_hdrs']), + 'pki_test': PrefixWithSrc(sources['pki_test']['srcs']), + 'pki_test_data': PrefixWithSrc(sources['pki_test']['data']), 'rust_bssl_crypto': bssl_crypto_files, 'rust_bssl_sys': bssl_sys_files, - 'ssl': ssl_source_files, - 'ssl_headers': ssl_h_files, - 'ssl_internal_headers': ssl_internal_h_files, - 'ssl_test': PrefixWithSrc(cmake['SSL_TEST_SOURCES']), - 'tool': PrefixWithSrc(cmake['BSSL_SOURCES']), - 'tool_headers': tool_h_files, - 'test_support': PrefixWithSrc(cmake['TEST_SUPPORT_SOURCES']), - 'test_support_headers': test_support_h_files, - 'urandom_test': PrefixWithSrc(cmake['URANDOM_TEST_SOURCES']), + 'ssl': PrefixWithSrc(sources['ssl']['srcs']), + 'ssl_headers': PrefixWithSrc(sources['ssl']['hdrs']), + 'ssl_internal_headers': PrefixWithSrc(sources['ssl']['internal_hdrs']), + 'ssl_test': PrefixWithSrc(sources['ssl_test']['srcs']), + 'tool': PrefixWithSrc(sources['bssl']['srcs']), + 'tool_headers': PrefixWithSrc(sources['bssl']['internal_hdrs']), + 'test_support': PrefixWithSrc(sources['test_support']['srcs']), + 'test_support_headers': + PrefixWithSrc(sources['test_support']['internal_hdrs']), + 'urandom_test': PrefixWithSrc(sources['urandom_test']['srcs']), } for platform in platforms: @@ -916,13 +690,8 @@ if __name__ == '__main__': '|'.join(sorted(ALL_PLATFORMS.keys()))) parser.add_option('--prefix', dest='prefix', help='For Bazel, prepend argument to all source files') - parser.add_option( - '--embed_test_data', type='choice', dest='embed_test_data', - action='store', default="true", choices=["true", "false"], - help='For Bazel or GN, don\'t embed data files in crypto_test_data.cc') options, args = parser.parse_args(sys.argv[1:]) PREFIX = options.prefix - EMBED_TEST_DATA = (options.embed_test_data == "true") if not args: parser.print_help() diff --git a/yass/third_party/boringssl/src/util/pregenerate/build.go b/yass/third_party/boringssl/src/util/pregenerate/build.go new file mode 100644 index 0000000000..192a1ead42 --- /dev/null +++ b/yass/third_party/boringssl/src/util/pregenerate/build.go @@ -0,0 +1,312 @@ +// Copyright (c) 2024, Google Inc. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +package main + +import ( + "bytes" + "cmp" + "encoding/json" + "fmt" + "path" + "path/filepath" + "slices" + "strings" + + "boringssl.googlesource.com/boringssl/util/build" +) + +// An InputTarget is a build target with build inputs that still need to be +// pregenerated. All file lists in InputTarget are interpreted with glob +// patterns as in filepath.Glob. +type InputTarget struct { + build.Target + // ErrData contains a list of errordata files to combine into err_data.c. + ErrData []string `json:"err_data,omitempty"` + // The following fields define perlasm sources for the corresponding + // architecture. + PerlasmAarch64 []PerlasmSource `json:"perlasm_aarch64,omitempty"` + PerlasmArm []PerlasmSource `json:"perlasm_arm,omitempty"` + PerlasmX86 []PerlasmSource `json:"perlasm_x86,omitempty"` + PerlasmX86_64 []PerlasmSource `json:"perlasm_x86_64,omitempty"` +} + +type PerlasmSource struct { + // Src the path to the input perlasm file. + Src string `json:"src"` + // Dst, if not empty, is base name of the destination file. If empty, this + // is determined from Src by default. It should be overriden if a single + // source file generates multiple functions (e.g. SHA-256 vs SHA-512) or + // multiple architectures (e.g. the "armx" files). + Dst string `json:"dst,omitempty"` + // Args is a list of extra parameters to pass to the script. + Args []string `json:"args,omitempty"` +} + +// Pregenerate converts an input target to an output target. It returns the +// result alongside a list of tasks that must be run to build the referenced +// files. +func (in *InputTarget) Pregenerate(name string) (out build.Target, tasks []Task, err error) { + // Expand wildcards. + out.Srcs, err = glob(in.Srcs) + if err != nil { + return + } + out.Hdrs, err = glob(in.Hdrs) + if err != nil { + return + } + out.InternalHdrs, err = glob(in.InternalHdrs) + if err != nil { + return + } + out.Asm, err = glob(in.Asm) + if err != nil { + return + } + out.Nasm, err = glob(in.Nasm) + if err != nil { + return + } + out.Data, err = glob(in.Data) + if err != nil { + return + } + + addTask := func(list *[]string, t Task) { + tasks = append(tasks, t) + *list = append(*list, t.Destination()) + } + + if len(in.ErrData) != 0 { + var inputs []string + inputs, err = glob(in.ErrData) + if err != nil { + return + } + addTask(&out.Srcs, &ErrDataTask{TargetName: name, Inputs: inputs}) + } + + addPerlasmTask := func(list *[]string, p *PerlasmSource, fileSuffix string, args []string) { + dst := p.Dst + if len(p.Dst) == 0 { + dst = strings.TrimSuffix(path.Base(p.Src), ".pl") + } + dst = path.Join("gen", name, dst+fileSuffix) + args = append(slices.Clone(args), p.Args...) + addTask(list, &PerlasmTask{Src: p.Src, Dst: dst, Args: args}) + } + + for _, p := range in.PerlasmAarch64 { + addPerlasmTask(&out.Asm, &p, "-apple.S", []string{"ios64"}) + addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux64"}) + addPerlasmTask(&out.Asm, &p, "-win.S", []string{"win64"}) + } + for _, p := range in.PerlasmArm { + addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux32"}) + } + for _, p := range in.PerlasmX86 { + addPerlasmTask(&out.Asm, &p, "-apple.S", []string{"macosx", "-fPIC", "-DOPENSSL_IA32_SSE2"}) + addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"elf", "-fPIC", "-DOPENSSL_IA32_SSE2"}) + addPerlasmTask(&out.Nasm, &p, "-win.asm", []string{"win32n", "-fPIC", "-DOPENSSL_IA32_SSE2"}) + } + for _, p := range in.PerlasmX86_64 { + addPerlasmTask(&out.Asm, &p, "-apple.S", []string{"macosx"}) + addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"elf"}) + addPerlasmTask(&out.Nasm, &p, "-win.asm", []string{"nasm"}) + } + + // Re-sort the modified fields. + slices.Sort(out.Srcs) + slices.Sort(out.Asm) + slices.Sort(out.Nasm) + + return +} + +func glob(paths []string) ([]string, error) { + var ret []string + for _, path := range paths { + if !strings.ContainsRune(path, '*') { + ret = append(ret, path) + continue + } + matches, err := filepath.Glob(path) + if err != nil { + return nil, err + } + if len(matches) == 0 { + return nil, fmt.Errorf("glob matched no files: %q", path) + } + // Switch from Windows to POSIX paths. + for _, match := range matches { + ret = append(ret, strings.ReplaceAll(match, "\\", "/")) + } + } + slices.Sort(ret) + return ret, nil +} + +func sortedKeys[K cmp.Ordered, V any](m map[K]V) []K { + keys := make([]K, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + slices.Sort(keys) + return keys +} + +func writeHeader(b *bytes.Buffer, comment string) { + fmt.Fprintf(b, "%s Copyright (c) 2024, Google Inc.\n", comment) + fmt.Fprintf(b, "%s\n", comment) + fmt.Fprintf(b, "%s Permission to use, copy, modify, and/or distribute this software for any\n", comment) + fmt.Fprintf(b, "%s purpose with or without fee is hereby granted, provided that the above\n", comment) + fmt.Fprintf(b, "%s copyright notice and this permission notice appear in all copies.\n", comment) + fmt.Fprintf(b, "%s\n", comment) + fmt.Fprintf(b, "%s THE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES\n", comment) + fmt.Fprintf(b, "%s WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF\n", comment) + fmt.Fprintf(b, "%s MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY\n", comment) + fmt.Fprintf(b, "%s SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\n", comment) + fmt.Fprintf(b, "%s WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION\n", comment) + fmt.Fprintf(b, "%s OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN\n", comment) + fmt.Fprintf(b, "%s CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.\n", comment) + fmt.Fprintf(b, "%s\n", comment) + fmt.Fprintf(b, "%s Generated by go ./util/pregenerate. Do not edit manually.\n", comment) +} + +func buildVariablesTask(targets map[string]build.Target, dst, comment string, writeVariable func(b *bytes.Buffer, name string, val []string)) Task { + return NewSimpleTask(dst, func() ([]byte, error) { + var b bytes.Buffer + writeHeader(&b, comment) + + for _, name := range sortedKeys(targets) { + target := targets[name] + if len(target.Srcs) != 0 { + writeVariable(&b, name+"_sources", target.Srcs) + } + if len(target.Hdrs) != 0 { + writeVariable(&b, name+"_headers", target.Hdrs) + } + if len(target.InternalHdrs) != 0 { + writeVariable(&b, name+"_internal_headers", target.InternalHdrs) + } + if len(target.Asm) != 0 { + writeVariable(&b, name+"_sources_asm", target.Asm) + } + if len(target.Nasm) != 0 { + writeVariable(&b, name+"_sources_nasm", target.Nasm) + } + if len(target.Data) != 0 { + writeVariable(&b, name+"_data", target.Data) + } + } + + return b.Bytes(), nil + }) +} + +func writeBazelVariable(b *bytes.Buffer, name string, val []string) { + fmt.Fprintf(b, "\n%s = [\n", name) + for _, v := range val { + fmt.Fprintf(b, " %q,\n", v) + } + fmt.Fprintf(b, "]\n") +} + +func writeCMakeVariable(b *bytes.Buffer, name string, val []string) { + fmt.Fprintf(b, "\nset(\n") + fmt.Fprintf(b, " %s\n\n", strings.ToUpper(name)) + for _, v := range val { + fmt.Fprintf(b, " %s\n", v) + } + fmt.Fprintf(b, ")\n") +} + +func writeMakeVariable(b *bytes.Buffer, name string, val []string) { + fmt.Fprintf(b, "\n%s := \\\n", name) + for i, v := range val { + if i == len(val)-1 { + fmt.Fprintf(b, " %s\n", v) + } else { + fmt.Fprintf(b, " %s \\\n", v) + } + } +} + +func writeGNVariable(b *bytes.Buffer, name string, val []string) { + // Bazel and GN have the same syntax similar syntax. + writeBazelVariable(b, name, val) +} + +func jsonTask(targets map[string]build.Target, dst string) Task { + return NewSimpleTask(dst, func() ([]byte, error) { + return json.MarshalIndent(targets, "", " ") + }) +} + +func soongTask(targets map[string]build.Target, dst string) Task { + return NewSimpleTask(dst, func() ([]byte, error) { + var b bytes.Buffer + writeHeader(&b, "//") + + writeAttribute := func(indent, name string, val []string) { + fmt.Fprintf(&b, "%s%s: [\n", indent, name) + for _, v := range val { + fmt.Fprintf(&b, "%s %q,\n", indent, v) + } + fmt.Fprintf(&b, "%s],\n", indent) + + } + + for _, name := range sortedKeys(targets) { + target := targets[name] + fmt.Fprintf(&b, "\ncc_defaults {\n") + fmt.Fprintf(&b, " name: %q\n", "boringssl_"+name+"_sources") + if len(target.Srcs) != 0 { + writeAttribute(" ", "srcs", target.Srcs) + } + if len(target.Data) != 0 { + writeAttribute(" ", "data", target.Data) + } + if len(target.Asm) != 0 { + fmt.Fprintf(&b, " target: {\n") + // Only emit asm for Linux. On Windows, BoringSSL requires NASM, which is + // not available in AOSP. On Darwin, the assembly works fine, but it + // conflicts with Android's FIPS build. See b/294399371. + fmt.Fprintf(&b, " linux: {\n") + writeAttribute(" ", "srcs", target.Asm) + fmt.Fprintf(&b, " },\n") + fmt.Fprintf(&b, " darwin: {\n") + fmt.Fprintf(&b, " cflags: [\"-DOPENSSL_NO_ASM\"],\n") + fmt.Fprintf(&b, " },\n") + fmt.Fprintf(&b, " windows: {\n") + fmt.Fprintf(&b, " cflags: [\"-DOPENSSL_NO_ASM\"],\n") + fmt.Fprintf(&b, " },\n") + fmt.Fprintf(&b, " },\n") + } + fmt.Fprintf(&b, "},\n") + } + + return b.Bytes(), nil + }) +} + +func MakeBuildFiles(targets map[string]build.Target) []Task { + // TODO(crbug.com/boringssl/542): Generate the build files for the other + // types as well. + return []Task{ + buildVariablesTask(targets, "gen/sources.cmake", "#", writeCMakeVariable), + jsonTask(targets, "gen/sources.json"), + } +} diff --git a/yass/third_party/boringssl/src/crypto/err/err_data_generate.go b/yass/third_party/boringssl/src/util/pregenerate/err_data.go similarity index 86% rename from yass/third_party/boringssl/src/crypto/err/err_data_generate.go rename to yass/third_party/boringssl/src/util/pregenerate/err_data.go index d4a7c28a8f..8d89d99d42 100644 --- a/yass/third_party/boringssl/src/crypto/err/err_data_generate.go +++ b/yass/third_party/boringssl/src/util/pregenerate/err_data.go @@ -12,25 +12,20 @@ // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -//go:build ignore - package main import ( "bufio" "bytes" "errors" - "flag" "fmt" "io" "os" + "path" "sort" "strconv" - "strings" ) -var verbose = flag.Bool("verbose", false, "If true, prints a status message at the end.") - // libraryNames must be kept in sync with the enum in err.h. The generated code // will contain static assertions to enforce this. var libraryNames = []string{ @@ -129,10 +124,6 @@ type stringWriter interface { func (st *stringList) WriteTo(out stringWriter, name string) { list := st.buildList() - if *verbose { - fmt.Fprintf(os.Stderr, "%s: %d bytes of list and %d bytes of string data.\n", name, 4*len(list), len(st.stringData)) - } - values := "kOpenSSL" + name + "Values" out.WriteString("const uint32_t " + values + "[] = {\n") for _, v := range list { @@ -207,9 +198,16 @@ func (e *errorData) readErrorDataFile(filename string) error { return scanner.Err() } -func main() { - flag.Parse() +type ErrDataTask struct { + TargetName string + Inputs []string +} +func (t *ErrDataTask) Destination() string { + return path.Join("gen", t.TargetName, "err_data.c") +} + +func (t *ErrDataTask) Run() ([]byte, error) { e := &errorData{ reasons: newStringList(), libraryMap: make(map[string]uint32), @@ -218,27 +216,13 @@ func main() { e.libraryMap[name] = uint32(i) + 1 } - cwd, err := os.Open(".") - if err != nil { - panic(err) - } - names, err := cwd.Readdirnames(-1) - if err != nil { - panic(err) - } - - sort.Strings(names) - for _, name := range names { - if !strings.HasSuffix(name, ".errordata") { - continue - } - if err := e.readErrorDataFile(name); err != nil { - panic(err) + for _, input := range t.Inputs { + if err := e.readErrorDataFile(input); err != nil { + return nil, err } } - out := os.Stdout - + var out bytes.Buffer out.WriteString(`/* Copyright (c) 2015, Google Inc. * * Permission to use, copy, modify, and/or distribute this software for any @@ -253,7 +237,7 @@ func main() { * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - /* This file was generated by err_data_generate.go. */ + /* This file was generated by go run ./util/pregenerate. */ #include #include @@ -263,10 +247,11 @@ func main() { `) for i, name := range libraryNames { - fmt.Fprintf(out, "static_assert(ERR_LIB_%s == %d, \"library value changed\");\n", name, i+1) + fmt.Fprintf(&out, "static_assert(ERR_LIB_%s == %d, \"library value changed\");\n", name, i+1) } - fmt.Fprintf(out, "static_assert(ERR_NUM_LIBS == %d, \"number of libraries changed\");\n", len(libraryNames)+1) + fmt.Fprintf(&out, "static_assert(ERR_NUM_LIBS == %d, \"number of libraries changed\");\n", len(libraryNames)+1) out.WriteString("\n") - e.reasons.WriteTo(out, "Reason") + e.reasons.WriteTo(&out, "Reason") + return out.Bytes(), nil } diff --git a/yass/third_party/boringssl/src/util/pregenerate/pregenerate.go b/yass/third_party/boringssl/src/util/pregenerate/pregenerate.go new file mode 100644 index 0000000000..1a3710ea82 --- /dev/null +++ b/yass/third_party/boringssl/src/util/pregenerate/pregenerate.go @@ -0,0 +1,223 @@ +// Copyright (c) 2024, Google Inc. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +// pregenerate manages generated files in BoringSSL +package main + +import ( + "bytes" + "encoding/json" + "errors" + "flag" + "fmt" + "os" + "path/filepath" + "runtime" + "slices" + "strings" + "sync" + + "boringssl.googlesource.com/boringssl/util/build" +) + +var ( + check = flag.Bool("check", false, "Check whether any files need to be updated, without actually updating them") + numWorkers = flag.Int("num-workers", runtime.NumCPU(), "Runs the given number of workers") + dryRun = flag.Bool("dry-run", false, "Skip actually writing any files") + perlPath = flag.String("perl", "perl", "Path to the perl command") + list = flag.Bool("list", false, "List all generated files, rather than actually run them") +) + +func runTask(t Task) error { + expected, err := t.Run() + if err != nil { + return err + } + + dst := t.Destination() + dstPath := filepath.FromSlash(dst) + if *check { + actual, err := os.ReadFile(dstPath) + if err != nil { + if os.IsNotExist(err) { + err = errors.New("missing file") + } + return err + } + + if !bytes.Equal(expected, actual) { + return errors.New("file out of date") + } + return nil + } + + if *dryRun { + fmt.Printf("Would write %d bytes to %q\n", len(expected), dst) + return nil + } + + if err := os.MkdirAll(filepath.Dir(dstPath), 0777); err != nil { + return err + } + return os.WriteFile(dstPath, expected, 0666) +} + +type taskError struct { + dst string + err error +} + +func worker(taskChan <-chan Task, errorChan chan<- taskError, wg *sync.WaitGroup) { + defer wg.Done() + for t := range taskChan { + if err := runTask(t); err != nil { + errorChan <- taskError{t.Destination(), err} + } + } +} + +func run() error { + if _, err := os.Stat("BUILDING.md"); err != nil { + return fmt.Errorf("must be run from BoringSSL source root") + } + + buildJSON, err := os.ReadFile("build.json") + if err != nil { + return err + } + + // Remove comments. For now, just do a very basic preprocessing step. If + // needed, we can switch to something well-defined like one of the many + // dozen different extended JSONs like JSON5. + lines := bytes.Split(buildJSON, []byte("\n")) + for i := range lines { + if idx := bytes.Index(lines[i], []byte("//")); idx >= 0 { + lines[i] = lines[i][:idx] + } + } + buildJSON = bytes.Join(lines, []byte("\n")) + + var targetsIn map[string]InputTarget + if err := json.Unmarshal(buildJSON, &targetsIn); err != nil { + return fmt.Errorf("error decoding build config: %s", err) + } + + var tasks []Task + targetsOut := make(map[string]build.Target) + for name, targetIn := range targetsIn { + targetOut, targetTasks, err := targetIn.Pregenerate(name) + if err != nil { + return err + } + targetsOut[name] = targetOut + tasks = append(tasks, targetTasks...) + } + + tasks = append(tasks, MakeBuildFiles(targetsOut)...) + tasks = append(tasks, NewSimpleTask("gen/README.md", func() ([]byte, error) { + return []byte(readme), nil + })) + + // Filter tasks by command-line argument. + if args := flag.Args(); len(args) != 0 { + var filtered []Task + for _, t := range tasks { + dst := t.Destination() + for _, arg := range args { + if strings.Contains(dst, arg) { + filtered = append(filtered, t) + break + } + } + } + tasks = filtered + } + + if *list { + paths := make([]string, len(tasks)) + for i, t := range tasks { + paths[i] = t.Destination() + } + slices.Sort(paths) + for _, p := range paths { + fmt.Println(p) + } + return nil + } + + // Schedule tasks in parallel. Perlasm benefits from running in parallel. The + // others likely do not, but it is simpler to parallelize them all. + var wg sync.WaitGroup + taskChan := make(chan Task, *numWorkers) + errorChan := make(chan taskError, *numWorkers) + for i := 0; i < *numWorkers; i++ { + wg.Add(1) + go worker(taskChan, errorChan, &wg) + } + + go func() { + for _, t := range tasks { + taskChan <- t + } + close(taskChan) + wg.Wait() + close(errorChan) + }() + + var failed bool + for err := range errorChan { + fmt.Fprintf(os.Stderr, "Error in file %q: %s\n", err.dst, err.err) + failed = true + } + if failed { + return errors.New("some files had errors") + } + return nil +} + +func main() { + flag.Parse() + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %s\n", err) + os.Exit(1) + } +} + +const readme = `# Pre-generated files + +This directory contains a number of pre-generated build artifacts. To simplify +downstream builds, they are checked into the repository, rather than dynamically +generated as part of the build. + +When developing on BoringSSL, if any inputs to these files are modified, callers +must run the following command to update the generated files: + + go run ./util/pregenerate + +To check that files are up-to-date without updating files, run: + + go run ./util/pregenerate -check + +This is run on CI to ensure the generated files remain up-to-date. + +To speed up local iteration, the tool accepts additional arguments to filter the +files generated. For example, if editing ` + "`aesni-x86_64.pl`" + `, this +command will only update files with "aesni-x86_64" as a substring. + + go run ./util/pregenerate aesni-x86_64 + +For convenience, all files in this directory, including this README, are managed +by the tool. This means the whole directory may be deleted and regenerated from +scratch at any time. +` diff --git a/yass/third_party/boringssl/src/util/pregenerate/task.go b/yass/third_party/boringssl/src/util/pregenerate/task.go new file mode 100644 index 0000000000..f04fc43d8a --- /dev/null +++ b/yass/third_party/boringssl/src/util/pregenerate/task.go @@ -0,0 +1,82 @@ +// Copyright (c) 2024, Google Inc. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +package main + +import ( + "bytes" + "os" + "os/exec" + "path" + "path/filepath" +) + +type Task interface { + // Destination returns the destination path for this task, using forward + // slashes and relative to the source directory. That is, use the "path" + // package, not "path/filepath". + Destination() string + + // Run computes the output for this task. It should be written to the + // destination path. + Run() ([]byte, error) +} + +type SimpleTask struct { + Dst string + RunFunc func() ([]byte, error) +} + +func (t *SimpleTask) Destination() string { return t.Dst } +func (t *SimpleTask) Run() ([]byte, error) { return t.RunFunc() } + +func NewSimpleTask(dst string, runFunc func() ([]byte, error)) *SimpleTask { + return &SimpleTask{Dst: dst, RunFunc: runFunc} +} + +type PerlasmTask struct { + Src, Dst string + Args []string +} + +func (t *PerlasmTask) Destination() string { return t.Dst } +func (t *PerlasmTask) Run() ([]byte, error) { + base := path.Base(t.Dst) + out, err := os.CreateTemp("", "*."+base) + if err != nil { + return nil, err + } + defer os.Remove(out.Name()) + + args := make([]string, 0, 2+len(t.Args)) + args = append(args, filepath.FromSlash(t.Src)) + args = append(args, t.Args...) + args = append(args, out.Name()) + cmd := exec.Command(*perlPath, args...) + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + if err := cmd.Run(); err != nil { + return nil, err + } + + data, err := os.ReadFile(out.Name()) + if err != nil { + return nil, err + } + + // On Windows, perl emits CRLF line endings. Normalize this so that the tool + // can be run on Windows too. + data = bytes.ReplaceAll(data, []byte("\r\n"), []byte("\n")) + return data, nil +} diff --git a/yass/third_party/boringssl/src/util/read_symbols.go b/yass/third_party/boringssl/src/util/read_symbols.go index 1d8ec85955..c5024f4737 100644 --- a/yass/third_party/boringssl/src/util/read_symbols.go +++ b/yass/third_party/boringssl/src/util/read_symbols.go @@ -142,7 +142,7 @@ func main() { break } } - if skip || isCXXSymbol(s) || strings.HasPrefix(s, "__real@") || strings.HasPrefix(s, "__x86.get_pc_thunk.") { + if skip || isCXXSymbol(s) || strings.HasPrefix(s, "__real@") || strings.HasPrefix(s, "__x86.get_pc_thunk.") || strings.HasPrefix(s, "DW.") { continue } if _, err := fmt.Fprintln(out, s); err != nil { diff --git a/yass/third_party/boringssl/src/util/run_android_tests.go b/yass/third_party/boringssl/src/util/run_android_tests.go index 79bcfe8607..ea7e4247e0 100644 --- a/yass/third_party/boringssl/src/util/run_android_tests.go +++ b/yass/third_party/boringssl/src/util/run_android_tests.go @@ -19,6 +19,7 @@ package main import ( "bufio" "bytes" + "encoding/json" "errors" "flag" "fmt" @@ -30,6 +31,7 @@ import ( "strconv" "strings" + "boringssl.googlesource.com/boringssl/util/build" "boringssl.googlesource.com/boringssl/util/testconfig" ) @@ -149,7 +151,7 @@ func goTool(args ...string) error { targetPrefix = fmt.Sprintf("aarch64-linux-android%d-", *apiLevel) cmd.Env = append(cmd.Env, "GOARCH=arm64") default: - fmt.Errorf("unknown Android ABI: %q", *abi) + return fmt.Errorf("unknown Android ABI: %q", *abi) } // Go's Android support requires cgo and compilers from the NDK. See @@ -288,6 +290,17 @@ func main() { os.Exit(1) } + targetsJSON, err := os.ReadFile("gen/sources.json") + if err != nil { + fmt.Printf("Error reading sources.json: %s.\n", err) + os.Exit(1) + } + var targets map[string]build.Target + if err := json.Unmarshal(targetsJSON, &targets); err != nil { + fmt.Printf("Error reading sources.json: %s.\n", err) + os.Exit(1) + } + // Clear the target directory. if err := adb("shell", "rm -Rf /data/local/tmp/boringssl-tmp"); err != nil { fmt.Printf("Failed to clear target directory: %s\n", err) @@ -309,19 +322,8 @@ func main() { "util/all_tests.json", "BUILDING.md", ) - - err := filepath.Walk("pki/testdata/", func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if info.Mode().IsRegular() { - files = append(files, path) - } - return nil - }) - if err != nil { - fmt.Printf("Can't walk pki/testdata: %s\n", err) - os.Exit(1) + for _, target := range targets { + files = append(files, target.Data...) } tests, err := testconfig.ParseTestConfig("util/all_tests.json") @@ -360,16 +362,16 @@ func main() { } var libraries []string - if _, err := os.Stat(filepath.Join(*buildDir, "crypto/libcrypto.so")); err == nil { + if _, err := os.Stat(filepath.Join(*buildDir, "libcrypto.so")); err == nil { libraries = []string{ "libboringssl_gtest.so", + "libcrypto.so", + "libdecrepit.so", "libpki.so", - "crypto/libcrypto.so", - "decrepit/libdecrepit.so", - "ssl/libssl.so", + "libssl.so", } } else if !os.IsNotExist(err) { - fmt.Printf("Failed to stat crypto/libcrypto.so: %s\n", err) + fmt.Printf("Failed to stat libcrypto.so: %s\n", err) os.Exit(1) } diff --git a/yass/third_party/icu/android/icudtl.dat b/yass/third_party/icu/android/icudtl.dat index c7b62ceece..398a4729cb 100644 Binary files a/yass/third_party/icu/android/icudtl.dat and b/yass/third_party/icu/android/icudtl.dat differ diff --git a/yass/third_party/icu/common/icudtl.dat b/yass/third_party/icu/common/icudtl.dat index 0f2eb9f746..331d2f39dc 100644 Binary files a/yass/third_party/icu/common/icudtl.dat and b/yass/third_party/icu/common/icudtl.dat differ diff --git a/yass/third_party/icu/ios/icudtl.dat b/yass/third_party/icu/ios/icudtl.dat index fd6d96a6d7..2b5a7cb4b7 100644 Binary files a/yass/third_party/icu/ios/icudtl.dat and b/yass/third_party/icu/ios/icudtl.dat differ diff --git a/yass/third_party/icu/source/common/BUILD.bazel b/yass/third_party/icu/source/common/BUILD.bazel index 47d3d24bf5..3ecae30c43 100644 --- a/yass/third_party/icu/source/common/BUILD.bazel +++ b/yass/third_party/icu/source/common/BUILD.bazel @@ -603,12 +603,16 @@ cc_library( "locbased.cpp", "locid.cpp", "loclikely.cpp", + "loclikelysubtags.cpp", "locmap.cpp", + "lsr.cpp", "resbund.cpp", "resource.cpp", "uloc.cpp", "uloc_tag.cpp", "uloc_keytype.cpp", + "ulocale.cpp", + "ulocbuilder.cpp", "uresbund.cpp", "uresdata.cpp", "wintz.cpp", diff --git a/yass/third_party/icu/source/common/brkeng.cpp b/yass/third_party/icu/source/common/brkeng.cpp index ce3d09cf23..3f58287532 100644 --- a/yass/third_party/icu/source/common/brkeng.cpp +++ b/yass/third_party/icu/source/common/brkeng.cpp @@ -21,6 +21,7 @@ #include "unicode/uscript.h" #include "unicode/ucharstrie.h" #include "unicode/bytestrie.h" +#include "unicode/rbbi.h" #include "brkeng.h" #include "cmemory.h" @@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() { } UBool -UnhandledEngine::handles(UChar32 c) const { +UnhandledEngine::handles(UChar32 c, const char* locale) const { + (void)locale; // Unused return fHandled && fHandled->contains(c); } int32_t UnhandledEngine::findBreaks( UText *text, - int32_t /* startPos */, + int32_t startPos, int32_t endPos, UVector32 &/*foundBreaks*/, UBool /* isPhraseBreaking */, UErrorCode &status) const { if (U_FAILURE(status)) return 0; - UChar32 c = utext_current32(text); + utext_setNativeIndex(text, startPos); + UChar32 c = utext_current32(text); while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { utext_next32(text); // TODO: recast loop to work with post-increment operations. c = utext_current32(text); @@ -120,41 +123,39 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() { } } -U_NAMESPACE_END -U_CDECL_BEGIN -static void U_CALLCONV _deleteEngine(void *obj) { - delete (const icu::LanguageBreakEngine *) obj; +void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) { + static UMutex gBreakEngineMutex; + Mutex m(&gBreakEngineMutex); + if (fEngines == nullptr) { + LocalPointer engines(new UStack(uprv_deleteUObject, nullptr, status), status); + if (U_SUCCESS(status)) { + fEngines = engines.orphan(); + } + } } -U_CDECL_END -U_NAMESPACE_BEGIN const LanguageBreakEngine * -ICULanguageBreakFactory::getEngineFor(UChar32 c) { +ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) { const LanguageBreakEngine *lbe = nullptr; UErrorCode status = U_ZERO_ERROR; + ensureEngines(status); + if (U_FAILURE(status) ) { + // Note: no way to return error code to caller. + return nullptr; + } static UMutex gBreakEngineMutex; Mutex m(&gBreakEngineMutex); - - if (fEngines == nullptr) { - LocalPointer engines(new UStack(_deleteEngine, nullptr, status), status); - if (U_FAILURE(status) ) { - // Note: no way to return error code to caller. - return nullptr; - } - fEngines = engines.orphan(); - } else { - int32_t i = fEngines->size(); - while (--i >= 0) { - lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != nullptr && lbe->handles(c)) { - return lbe; - } + int32_t i = fEngines->size(); + while (--i >= 0) { + lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); + if (lbe != nullptr && lbe->handles(c, locale)) { + return lbe; } } - + // We didn't find an engine. Create one. - lbe = loadEngineFor(c); + lbe = loadEngineFor(c, locale); if (lbe != nullptr) { fEngines->push((void *)lbe, status); } @@ -162,7 +163,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) { } const LanguageBreakEngine * -ICULanguageBreakFactory::loadEngineFor(UChar32 c) { +ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { @@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { return nullptr; } + +void ICULanguageBreakFactory::addExternalEngine( + ExternalBreakEngine* external, UErrorCode& status) { + LocalPointer engine(external, status); + ensureEngines(status); + LocalPointer wrapper( + new BreakEngineWrapper(engine.orphan(), status), status); + static UMutex gBreakEngineMutex; + Mutex m(&gBreakEngineMutex); + fEngines->push(wrapper.getAlias(), status); + wrapper.orphan(); +} + +BreakEngineWrapper::BreakEngineWrapper( + ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) { +} + +BreakEngineWrapper::~BreakEngineWrapper() { +} + +UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const { + return delegate->isFor(c, locale); +} + +int32_t BreakEngineWrapper::findBreaks( + UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks, + UBool /* isPhraseBreaking */, + UErrorCode &status) const { + if (U_FAILURE(status)) return 0; + int32_t result = 0; + + // Find the span of characters included in the set. + // The span to break begins at the current position in the text, and + // extends towards the start or end of the text, depending on 'reverse'. + + utext_setNativeIndex(text, startPos); + int32_t start = (int32_t)utext_getNativeIndex(text); + int32_t current; + int32_t rangeStart; + int32_t rangeEnd; + UChar32 c = utext_current32(text); + while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) { + utext_next32(text); // TODO: recast loop for postincrement + c = utext_current32(text); + } + rangeStart = start; + rangeEnd = current; + int32_t beforeSize = foundBreaks.size(); + int32_t additionalCapacity = rangeEnd - rangeStart + 1; + // enlarge to contains (rangeEnd-rangeStart+1) more items + foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status); + if (U_FAILURE(status)) return 0; + foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity); + result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize, + additionalCapacity, status); + if (U_FAILURE(status)) return 0; + foundBreaks.setSize(beforeSize + result); + utext_setNativeIndex(text, current); + return result; +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/yass/third_party/icu/source/common/brkeng.h b/yass/third_party/icu/source/common/brkeng.h index 240dc8f4d3..42a3d697cf 100644 --- a/yass/third_party/icu/source/common/brkeng.h +++ b/yass/third_party/icu/source/common/brkeng.h @@ -10,6 +10,7 @@ #ifndef BRKENG_H #define BRKENG_H +#include "unicode/umisc.h" #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/utext.h" @@ -21,6 +22,7 @@ class UnicodeSet; class UStack; class UVector32; class DictionaryMatcher; +class ExternalBreakEngine; /******************************************************************* * LanguageBreakEngine @@ -35,7 +37,7 @@ class DictionaryMatcher; *

LanguageBreakEngines should normally be implemented so as to * be shared between threads without locking.

*/ -class LanguageBreakEngine : public UMemory { +class LanguageBreakEngine : public UObject { public: /** @@ -54,10 +56,11 @@ class LanguageBreakEngine : public UMemory { * a particular kind of break.

* * @param c A character which begins a run that the engine might handle + * @param locale The locale. * @return true if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c) const = 0; + virtual UBool handles(UChar32 c, const char* locale) const = 0; /** *

Find any breaks within a run in the supplied text.

@@ -80,6 +83,35 @@ class LanguageBreakEngine : public UMemory { }; +/******************************************************************* + * BreakEngineWrapper + */ + +/** + *

BreakEngineWrapper implement LanguageBreakEngine by + * a thin wrapper that delegate the task to ExternalBreakEngine + *

+ */ +class BreakEngineWrapper : public LanguageBreakEngine { + public: + + BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status); + + virtual ~BreakEngineWrapper(); + + virtual UBool handles(UChar32 c, const char* locale) const override; + + virtual int32_t findBreaks( UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks, + UBool isPhraseBreaking, + UErrorCode &status) const override; + + private: + LocalPointer delegate; +}; + /******************************************************************* * LanguageBreakFactory */ @@ -125,9 +157,10 @@ class LanguageBreakFactory : public UMemory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. + * @param locale The locale. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; + virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0; }; @@ -174,10 +207,11 @@ class UnhandledEngine : public LanguageBreakEngine { * a particular kind of break.

* * @param c A character which begins a run that the engine might handle + * @param locale The locale. * @return true if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c) const override; + virtual UBool handles(UChar32 c, const char* locale) const override; /** *

Find any breaks within a run in the supplied text.

@@ -247,9 +281,18 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. + * @param locale The locale. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override; + virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override; + + /** + * Add and adopt the engine and return an URegistryKey. + * @param engine The ExternalBreakEngine to be added and adopt. The caller + * pass the ownership and should not release the memory after this. + * @param status the error code. + */ + virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status); protected: /** @@ -258,9 +301,10 @@ protected: * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. + * @param locale The locale. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale); /** *

Create a DictionaryMatcher for the specified script and break type.

@@ -269,6 +313,9 @@ protected: * @return A DictionaryMatcher with the desired characteristics, or nullptr. */ virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); + + private: + void ensureEngines(UErrorCode& status); }; U_NAMESPACE_END diff --git a/yass/third_party/icu/source/common/brkiter.cpp b/yass/third_party/icu/source/common/brkiter.cpp index 41e4e0dff5..b452cf2c05 100644 --- a/yass/third_party/icu/source/common/brkiter.cpp +++ b/yass/third_party/icu/source/common/brkiter.cpp @@ -27,6 +27,7 @@ #include "unicode/rbbi.h" #include "unicode/brkiter.h" #include "unicode/udata.h" +#include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/ustring.h" #include "unicode/filteredbrk.h" @@ -121,8 +122,11 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st // If there is a result, set the valid locale and actual locale, and the kind if (U_SUCCESS(status) && result != nullptr) { U_LOCALE_BASED(locBased, *(BreakIterator*)result); + locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale.data()); + uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY); + result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate } ures_close(b); @@ -202,18 +206,20 @@ BreakIterator::getAvailableLocales(int32_t& count) BreakIterator::BreakIterator() { - *validLocale = *actualLocale = 0; + *validLocale = *actualLocale = *requestLocale = 0; } BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); + uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); } BreakIterator &BreakIterator::operator =(const BreakIterator &other) { if (this != &other) { uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); + uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); } return *this; } @@ -493,12 +499,18 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) Locale BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { + if (type == ULOC_REQUESTED_LOCALE) { + return Locale(requestLocale); + } U_LOCALE_BASED(locBased, *this); return locBased.getLocale(type, status); } const char * BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { + if (type == ULOC_REQUESTED_LOCALE) { + return requestLocale; + } U_LOCALE_BASED(locBased, *this); return locBased.getLocaleID(type, status); } diff --git a/yass/third_party/icu/source/common/characterproperties.cpp b/yass/third_party/icu/source/common/characterproperties.cpp index 978e6761ce..f1e15b488d 100644 --- a/yass/third_party/icu/source/common/characterproperties.cpp +++ b/yass/third_party/icu/source/common/characterproperties.cpp @@ -169,7 +169,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { case UPROPS_SRC_INPC: case UPROPS_SRC_INSC: case UPROPS_SRC_VO: - uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); + uprops_addPropertyStarts(src, &sa, &errorCode); break; case UPROPS_SRC_EMOJI: { const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode); @@ -178,6 +178,14 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { } break; } + case UPROPS_SRC_IDSU: + // New in Unicode 15.1 for just two characters. + sa.add(sa.set, 0x2FFE); + sa.add(sa.set, 0x2FFF + 1); + break; + case UPROPS_SRC_ID_COMPAT_MATH: + uprops_addPropertyStarts(src, &sa, &errorCode); + break; default: errorCode = U_INTERNAL_PROGRAM_ERROR; break; diff --git a/yass/third_party/icu/source/common/dictbe.cpp b/yass/third_party/icu/source/common/dictbe.cpp index af0ba6ec23..9c3e3e3636 100644 --- a/yass/third_party/icu/source/common/dictbe.cpp +++ b/yass/third_party/icu/source/common/dictbe.cpp @@ -42,7 +42,7 @@ DictionaryBreakEngine::~DictionaryBreakEngine() { } UBool -DictionaryBreakEngine::handles(UChar32 c) const { +DictionaryBreakEngine::handles(UChar32 c, const char*) const { return fSet.contains(c); } @@ -54,13 +54,13 @@ DictionaryBreakEngine::findBreaks( UText *text, UBool isPhraseBreaking, UErrorCode& status) const { if (U_FAILURE(status)) return 0; - (void)startPos; // TODO: remove this param? int32_t result = 0; // Find the span of characters included in the set. // The span to break begins at the current position in the text, and // extends towards the start or end of the text, depending on 'reverse'. + utext_setNativeIndex(text, startPos); int32_t start = (int32_t)utext_getNativeIndex(text); int32_t current; int32_t rangeStart; diff --git a/yass/third_party/icu/source/common/dictbe.h b/yass/third_party/icu/source/common/dictbe.h index a2c761bdc3..e512071fa4 100644 --- a/yass/third_party/icu/source/common/dictbe.h +++ b/yass/third_party/icu/source/common/dictbe.h @@ -62,10 +62,11 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * a particular kind of break.

* * @param c A character which begins a run that the engine might handle + * @param locale The locale. * @return true if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c) const override; + virtual UBool handles(UChar32 c, const char* locale) const override; /** *

Find any breaks within a run in the supplied text.

diff --git a/yass/third_party/icu/source/common/loadednormalizer2impl.cpp b/yass/third_party/icu/source/common/loadednormalizer2impl.cpp index 768564edc8..99b8f3e86c 100644 --- a/yass/third_party/icu/source/common/loadednormalizer2impl.cpp +++ b/yass/third_party/icu/source/common/loadednormalizer2impl.cpp @@ -143,6 +143,9 @@ static icu::UInitOnce nfkcInitOnce {}; static Norm2AllModes *nfkc_cfSingleton; static icu::UInitOnce nfkc_cfInitOnce {}; +static Norm2AllModes *nfkc_scfSingleton; +static icu::UInitOnce nfkc_scfInitOnce {}; + static UHashtable *cache=nullptr; // UInitOnce singleton initialization function @@ -156,6 +159,8 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode); } else if (uprv_strcmp(what, "nfkc_cf") == 0) { nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode); + } else if (uprv_strcmp(what, "nfkc_scf") == 0) { + nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode); } else { UPRV_UNREACHABLE_EXIT; // Unknown singleton } @@ -183,6 +188,10 @@ static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { nfkc_cfSingleton = nullptr; nfkc_cfInitOnce.reset(); + delete nfkc_scfSingleton; + nfkc_scfSingleton = nullptr; + nfkc_scfInitOnce.reset(); + uhash_close(cache); cache=nullptr; return true; @@ -213,6 +222,13 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { return nfkc_cfSingleton; } +const Norm2AllModes * +Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return nullptr; } + umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode); + return nfkc_scfSingleton; +} + #if !NORM2_HARDCODE_NFC_DATA const Normalizer2 * Normalizer2::getNFCInstance(UErrorCode &errorCode) { @@ -261,6 +277,12 @@ Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { return allModes!=nullptr ? &allModes->comp : nullptr; } +const Normalizer2 * +Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode); + return allModes!=nullptr ? &allModes->comp : nullptr; +} + const Normalizer2 * Normalizer2::getInstance(const char *packageName, const char *name, @@ -281,6 +303,8 @@ Normalizer2::getInstance(const char *packageName, allModes=Norm2AllModes::getNFKCInstance(errorCode); } else if(0==uprv_strcmp(name, "nfkc_cf")) { allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); + } else if(0==uprv_strcmp(name, "nfkc_scf")) { + allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode); } } if(allModes==nullptr && U_SUCCESS(errorCode)) { @@ -393,6 +417,11 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); } +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode); +} + U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getInstance(const char *packageName, const char *name, diff --git a/yass/third_party/icu/source/common/localefallback_data.h b/yass/third_party/icu/source/common/localefallback_data.h index d2eb7eaafd..c847edefff 100644 --- a/yass/third_party/icu/source/common/localefallback_data.h +++ b/yass/third_party/icu/source/common/localefallback_data.h @@ -18,94 +18,93 @@ const char scriptCodeChars[] = "Kana\0Kawi\0Khar\0Khmr\0Kits\0Knda\0Kore\0Lana\0Laoo\0Lepc\0Lina\0" "Linb\0Lisu\0Lyci\0Lydi\0Mand\0Mani\0Marc\0Medf\0Merc\0Mlym\0Modi\0" "Mong\0Mroo\0Mtei\0Mymr\0Narb\0Newa\0Nkoo\0Nshu\0Ogam\0Olck\0Orkh\0" - "Orya\0Osge\0Ougr\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0Rohg\0Runr\0" - "Samr\0Sarb\0Saur\0Sgnw\0Sinh\0Sogd\0Sora\0Soyo\0Syrc\0Tagb\0Takr\0" - "Tale\0Talu\0Taml\0Tang\0Tavt\0Telu\0Tfng\0Thaa\0Thai\0Tibt\0Tnsa\0" - "Toto\0Ugar\0Vaii\0Wcho\0Xpeo\0Xsux\0Yiii\0"; + "Orya\0Osge\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0Rohg\0Runr\0Samr\0" + "Sarb\0Saur\0Sgnw\0Sinh\0Sogd\0Sora\0Soyo\0Syrc\0Tagb\0Takr\0Tale\0" + "Talu\0Taml\0Tang\0Tavt\0Telu\0Tfng\0Thaa\0Thai\0Tibt\0Tnsa\0Toto\0" + "Ugar\0Vaii\0Wcho\0Xpeo\0Xsux\0Yiii\0"; const char dsLocaleIDChars[] = - "aaf\0aao\0aat\0ab\0abh\0abl\0abq\0abv\0acm\0acq\0acw\0acx\0adf\0" - "adp\0adx\0ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0" - "aho\0ahr\0aib\0aij\0ain\0aio\0aiq\0ajp\0ajt\0akk\0akv\0alk\0all\0" - "alr\0alt\0alw\0am\0ams\0amw\0ani\0anp\0anr\0anu\0aot\0apc\0apd\0" - "aph\0aqc\0ar\0arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0atn\0atv\0" - "auj\0auz\0av\0avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0" - "az_IR\0az_RU\0azb\0ba\0bal\0bap\0bax\0bbl\0bcq\0bdv\0bdz\0be\0" - "bee\0bej\0bfb\0bfq\0bft\0bfu\0bfw\0bfy\0bfz\0bg\0bgc\0bgd\0bgn\0" - "bgp\0bgq\0bgw\0bgx\0bha\0bhb\0bhd\0bhe\0bhh\0bhi\0bhj\0bhm\0bhn\0" - "bho\0bht\0bhu\0biy\0bjf\0bji\0bjj\0bjm\0blk\0blt\0bmj\0bn\0bns\0" - "bo\0bph\0bpx\0bpy\0bqi\0bra\0brb\0brd\0brh\0brk\0brv\0brx\0bsh\0" - "bsk\0bsq\0bst\0btd\0btm\0btv\0bua\0bwe\0bxm\0bxu\0byh\0byn\0byw\0" - "bzi\0cbn\0ccp\0cde\0cdh\0cdi\0cdj\0cdm\0cdo\0cdz\0ce\0cgk\0chg\0" - "chm\0chr\0chx\0cih\0cja\0cji\0cjm\0cjy\0ckb\0ckt\0clh\0clw\0cmg\0" - "cna\0cnp\0cog\0cop\0cpg\0cr\0crh\0crj\0crk\0crl\0crm\0csh\0csp\0" - "csw\0ctd\0ctg\0ctn\0ctt\0cu\0cuu\0cv\0czh\0czk\0daq\0dar\0dcc\0" - "ddo\0def\0deh\0der\0dgl\0dhi\0dhn\0dho\0dhw\0dka\0dlg\0dmf\0dmk\0" - "dml\0dng\0dnu\0dnv\0doi\0dox\0dre\0drh\0drq\0drs\0dry\0dso\0dty\0" - "dub\0duh\0dus\0dv\0dwk\0dwz\0dz\0dzl\0ecr\0ecy\0egy\0eky\0el\0" - "emg\0emu\0enf\0enh\0era\0esg\0esh\0ett\0eve\0evn\0fa\0fay\0faz\0" - "fia\0fmu\0fub\0gan\0gaq\0gas\0gau\0gbj\0gbk\0gbl\0gbm\0gbz\0gdb\0" - "gdo\0gdx\0gez\0ggg\0ggn\0gha\0ghe\0ghr\0ght\0gig\0gin\0gjk\0gju\0" - "gld\0glh\0glk\0gmv\0gmy\0goe\0gof\0gok\0gom\0gon\0got\0gra\0grc\0" - "grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0gyo\0gzi\0ha_CM\0ha_SD\0hac\0" - "hak\0har\0haz\0hbo\0hdy\0he\0hi\0hii\0hit\0hkh\0hlb\0hlu\0hmd\0" - "hmj\0hmq\0hnd\0hne\0hnj\0hnj_AU\0hnj_CN\0hnj_FR\0hnj_GF\0hnj_LA\0" - "hnj_MM\0hnj_SR\0hnj_TH\0hnj_US\0hnj_VN\0hno\0hoc\0hoh\0hoj\0how\0" - "hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0huy\0huz\0hy\0hyw\0ii\0" - "imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0iu\0iw\0ja\0jad\0jat\0" - "jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0ji\0jje\0jkm\0jml\0jna\0" - "jnd\0jnl\0jns\0jog\0jpa\0jpr\0jul\0jun\0juy\0jya\0jye\0ka\0kaa\0" - "kap\0kaw\0kbd\0kbu\0kby\0kca\0kdq\0kdt\0ket\0kex\0key\0kfa\0kfb\0" - "kfc\0kfd\0kfe\0kfh\0kfi\0kfk\0kfm\0kfp\0kfq\0kfr\0kfs\0kfx\0kfy\0" - "kgj\0kgy\0khb\0khf\0khg\0khn\0kht\0khv\0khw\0kif\0kim\0kip\0kjg\0" - "kjh\0kjl\0kjo\0kjp\0kjt\0kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0" - "kkh\0kkt\0kle\0klj\0klr\0km\0kmj\0kmz\0kn\0ko\0koi\0kok\0kpt\0" - "kpy\0kqd\0kqy\0kra\0krc\0krk\0krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0" - "ktb\0ktl\0ktp\0ku_LB\0kuf\0kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0" - "kxc\0kxf\0kxk\0kxl\0kxm\0kxp\0kxv\0ky\0ky_CN\0kyu\0kyv\0kyw\0" - "kzh\0lab\0lad\0lae\0lah\0lbc\0lbe\0lbf\0lbj\0lbm\0lbo\0lbr\0lcp\0" - "lep\0lez\0lhm\0lhs\0lif\0lis\0lkh\0lki\0lmh\0lmn\0lo\0loy\0lpo\0" - "lrc\0lrk\0lrl\0lsa\0lsd\0lss\0luk\0luu\0luv\0luz\0lwl\0lwm\0lya\0" - "lzh\0mag\0mai\0man_GN\0mby\0mde\0mdf\0mdx\0mdy\0mfa\0mfi\0mgp\0" - "mhj\0mid\0mjl\0mjq\0mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0mke\0mki\0" - "mkm\0ml\0mlf\0mn\0mn_CN\0mni\0mnj\0mns\0mnw\0mpz\0mr\0mra\0mrd\0" - "mrj\0mro\0mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0muz\0mvf\0" - "mvy\0mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzn\0nan\0nao\0ncd\0" - "ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0nit\0niv\0nli\0nlm\0" - "nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0npb\0nqo\0nsd\0nsf\0" - "nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0nyq\0oaa\0oac\0oar\0oav\0" - "obm\0obr\0odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0ole\0omk\0omp\0" - "omr\0oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0osi\0ota\0otb\0otk\0" - "oty\0oui\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0pce\0pcf\0pcg\0pch\0" - "pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0phk\0phl\0phn\0pho\0" - "phr\0pht\0phv\0phw\0pi\0pka\0pkr\0plk\0pll\0pmh\0pnt\0ppa\0pra\0" + "aaf\0aao\0aat\0ab\0abh\0abl\0abv\0acm\0acq\0acw\0acx\0adf\0adx\0" + "ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0aho\0ahr\0" + "aib\0aij\0ain\0aio\0aiq\0ajp\0akk\0akv\0alk\0all\0alr\0alt\0alw\0" + "am\0ams\0amw\0ani\0anp\0anr\0anu\0aot\0apc\0apd\0aph\0aqc\0ar\0" + "arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0atn\0atv\0auj\0auz\0av\0" + "avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0az_IR\0az_RU\0" + "azb\0ba\0bal\0bap\0bax\0bbl\0bcq\0bdv\0bdz\0be\0bee\0bej\0bfb\0" + "bfq\0bft\0bfu\0bfw\0bfy\0bfz\0bg\0bgc\0bgd\0bgn\0bgp\0bgq\0bgw\0" + "bgx\0bha\0bhb\0bhd\0bhe\0bhh\0bhi\0bhj\0bhm\0bhn\0bho\0bht\0bhu\0" + "biy\0bjf\0bjj\0bjm\0blk\0blt\0bmj\0bn\0bns\0bo\0bph\0bpx\0bpy\0" + "bqi\0bra\0brb\0brd\0brh\0brk\0brv\0brx\0bsh\0bsk\0bsq\0bst\0btd\0" + "btm\0btv\0bua\0bwe\0bxm\0bxu\0byh\0byn\0byw\0bzi\0cbn\0ccp\0cde\0" + "cdh\0cdi\0cdj\0cdm\0cdo\0cdz\0ce\0cgk\0chg\0chm\0chr\0chx\0cih\0" + "cja\0cji\0cjm\0cjy\0ckb\0ckt\0clh\0clw\0cmg\0cna\0cnp\0cog\0cop\0" + "cpg\0cr\0crh\0crj\0crk\0crl\0crm\0csh\0csp\0csw\0ctd\0ctg\0ctn\0" + "ctt\0cu\0cuu\0cv\0czh\0czk\0daq\0dar\0dcc\0ddo\0def\0deh\0der\0" + "dhi\0dhn\0dho\0dhw\0dka\0dlg\0dmf\0dmk\0dml\0dng\0dnu\0dnv\0doi\0" + "dox\0dre\0drq\0drs\0dry\0dso\0dty\0dub\0duh\0dus\0dv\0dwk\0dwz\0" + "dz\0dzl\0ecr\0ecy\0egy\0eky\0el\0emg\0emu\0enf\0enh\0era\0esg\0" + "esh\0ett\0eve\0evn\0fa\0fay\0faz\0fia\0fmu\0fub\0gan\0gaq\0gas\0" + "gau\0gbj\0gbk\0gbl\0gbm\0gbz\0gdb\0gdo\0gdx\0gez\0ggg\0gha\0ghe\0" + "ghr\0ght\0gig\0gin\0gjk\0gju\0gld\0glh\0glk\0gmv\0gmy\0goe\0gof\0" + "gok\0gom\0gon\0got\0gra\0grc\0grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0" + "gyo\0gzi\0ha_CM\0ha_SD\0hac\0hak\0har\0haz\0hbo\0hdy\0he\0hi\0" + "hii\0hit\0hkh\0hlb\0hlu\0hmd\0hmj\0hmq\0hnd\0hne\0hnj\0hnj_AU\0" + "hnj_CN\0hnj_FR\0hnj_GF\0hnj_LA\0hnj_MM\0hnj_SR\0hnj_TH\0hnj_VN\0" + "hno\0hoc\0hoh\0hoj\0how\0hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0" + "huy\0huz\0hy\0hyw\0ii\0imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0" + "iu\0iw\0ja\0jad\0jat\0jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0" + "ji\0jje\0jkm\0jml\0jna\0jnd\0jnl\0jns\0jog\0jpa\0jpr\0jrb\0jrb_MA\0" + "jul\0jun\0juy\0jya\0jye\0ka\0kaa\0kap\0kaw\0kbd\0kbu\0kby\0kca\0" + "kdq\0kdt\0ket\0kex\0key\0kfa\0kfb\0kfc\0kfd\0kfe\0kfh\0kfi\0kfk\0" + "kfm\0kfp\0kfq\0kfr\0kfs\0kfx\0kfy\0kgj\0kgy\0khb\0khf\0khg\0khn\0" + "kht\0khv\0khw\0kif\0kim\0kip\0kjg\0kjh\0kjl\0kjo\0kjp\0kjt\0kk\0" + "kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0kkh\0kkt\0kle\0klj\0klr\0km\0" + "kmj\0kmz\0kn\0ko\0koi\0kok\0kpt\0kpy\0kqd\0kqy\0kra\0krc\0krk\0" + "krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0ktb\0ktl\0ktp\0ku_LB\0kuf\0" + "kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0kxf\0kxk\0kxm\0kxp\0ky\0ky_CN\0" + "kyu\0kyv\0kyw\0lab\0lad\0lae\0lah\0lbc\0lbe\0lbf\0lbj\0lbm\0lbo\0" + "lbr\0lcp\0lep\0lez\0lhm\0lhs\0lif\0lis\0lkh\0lki\0lmh\0lmn\0lo\0" + "loy\0lpo\0lrc\0lrk\0lrl\0lsa\0lsd\0lss\0luk\0luu\0luv\0luz\0lwl\0" + "lwm\0lya\0lzh\0mag\0mai\0man_GN\0mby\0mde\0mdf\0mdx\0mdy\0mfa\0" + "mfi\0mgp\0mhj\0mid\0mjl\0mjq\0mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0" + "mke\0mki\0mkm\0ml\0mlf\0mn\0mn_CN\0mni\0mnj\0mns\0mnw\0mpz\0mr\0" + "mra\0mrd\0mrj\0mro\0mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0" + "muz\0mvf\0mvy\0mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzn\0nan\0" + "nao\0ncd\0ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0nit\0niv\0" + "nli\0nlm\0nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0npb\0nqo\0" + "nsd\0nsf\0nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0nyq\0oaa\0oac\0" + "oar\0oav\0obm\0obr\0odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0ole\0" + "omk\0omp\0omr\0oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0osi\0ota\0" + "otb\0otk\0oty\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0pce\0pcf\0pcg\0" + "pch\0pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0phk\0phl\0phn\0" + "pho\0phr\0pht\0phv\0phw\0pi\0pka\0pkr\0plk\0pll\0pmh\0pnt\0pra\0" "prc\0prd\0prp\0prt\0prx\0ps\0psh\0psi\0pst\0pum\0pwo\0pwr\0pww\0" "pyx\0qxq\0raa\0rab\0raf\0rah\0raj\0rav\0rbb\0rdb\0rei\0rhg\0rji\0" - "rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0rsk\0rtw\0ru\0rue\0rut\0rwr\0" - "ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scp\0sct\0scu\0" - "scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sds\0sel\0sfm\0sga\0sgh\0" - "sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0shv\0si\0sia\0" - "sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0slq\0smh\0smp\0smu\0" - "smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0sr\0srb\0srh\0" - "srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0swi\0swv\0syc\0" - "syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0tcx\0tcy\0tda\0" - "tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0th\0the\0thf\0" - "thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0tjl\0tjo\0tkb\0" - "tks\0tkt\0tmk\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0trw\0tsd\0tsf\0" - "tsj\0tt\0tth\0tto\0tts\0tvn\0twm\0txg\0txo\0tyr\0tyv\0ude\0udg\0" - "udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0ulc\0unr\0" - "unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0vaa\0vaf\0" - "vah\0vai\0vas\0vav\0vay\0vgr\0vmd\0vmh\0wal\0wbk\0wbq\0wbr\0wlo\0" - "wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xal\0xan\0xas\0xco\0xcr\0xdq\0" - "xhe\0xhm\0xis\0xka\0xkc\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0xmn\0xmr\0" - "xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xss\0xub\0xuj\0" - "xve\0xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0ygp\0yhd\0" - "yi\0yig\0yih\0yiv\0ykg\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0ysn\0ysp\0" - "ysr\0ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0zba\0zch\0" - "zdj\0zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0" - "zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0" - "zh_US\0zh_VN\0zhd\0zhx\0zkb\0zko\0zkt\0zkz\0zlj\0zln\0zlq\0zqe\0" - "zrp\0zum\0zyg\0zyn\0zzj\0"; + "rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0rom_BG\0rsk\0rtw\0ru\0rue\0" + "rut\0rwr\0ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scl_IN\0" + "scp\0sct\0scu\0scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sds\0sel\0" + "sfm\0sga\0sgh\0sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0" + "shv\0si\0sia\0sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0slq\0" + "smh\0smp\0smu\0smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0" + "sr\0srb\0srh\0srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0" + "swi\0swv\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0" + "tcx\0tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0" + "th\0the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0" + "tjl\0tjo\0tkb\0tks\0tkt\0tmk\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0" + "trw\0tsd\0tsj\0tt\0tth\0tto\0tts\0tvn\0twm\0txg\0txo\0tyr\0tyv\0" + "ude\0udg\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0" + "ulc\0unr\0unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0" + "vaa\0vaf\0vah\0vai\0vas\0vav\0vay\0vgr\0vmd\0vmh\0wal\0wbk\0wbq\0" + "wbr\0wlo\0wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xal\0xan\0xas\0xco\0" + "xcr\0xdq\0xhe\0xhm\0xis\0xka\0xkc\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0" + "xmn\0xmr\0xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xss\0" + "xub\0xuj\0xve\0xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0" + "ygp\0yhd\0yi\0yig\0yih\0yiv\0ykg\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0" + "ysn\0ysp\0ysr\0ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0" + "zba\0zch\0zdj\0zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0" + "zh_GB\0zh_GF\0zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0" + "zh_TH\0zh_TW\0zh_US\0zh_VN\0zhd\0zhx\0zkb\0zko\0zkt\0zkz\0zlj\0" + "zln\0zlq\0zqe\0zrp\0zum\0zyg\0zyn\0zzj\0"; const int32_t defaultScriptTable[] = { 0, 320, // aaf -> Mlym @@ -113,1033 +112,1022 @@ const int32_t defaultScriptTable[] = { 8, 150, // aat -> Grek 12, 100, // ab -> Cyrl 15, 10, // abh -> Arab - 19, 425, // abl -> Rjng - 23, 100, // abq -> Cyrl - 27, 10, // abv -> Arab - 31, 10, // acm -> Arab - 35, 10, // acq -> Arab - 39, 10, // acw -> Arab - 43, 10, // acx -> Arab - 47, 10, // adf -> Arab - 51, 540, // adp -> Tibt - 55, 540, // adx -> Tibt - 59, 100, // ady -> Cyrl - 63, 25, // ae -> Avst - 66, 10, // aeb -> Arab - 70, 10, // aec -> Arab - 74, 10, // aee -> Arab - 78, 10, // aeq -> Arab - 82, 10, // afb -> Arab - 86, 105, // agi -> Deva - 90, 120, // agj -> Ethi - 94, 100, // agx -> Cyrl - 98, 120, // ahg -> Ethi - 102, 5, // aho -> Ahom - 106, 105, // ahr -> Deva - 110, 10, // aib -> Arab - 114, 185, // aij -> Hebr - 118, 220, // ain -> Kana - 122, 345, // aio -> Mymr - 126, 10, // aiq -> Arab - 130, 10, // ajp -> Arab - 134, 10, // ajt -> Arab - 138, 575, // akk -> Xsux - 142, 100, // akv -> Cyrl - 146, 260, // alk -> Laoo - 150, 320, // all -> Mlym - 154, 100, // alr -> Cyrl - 158, 100, // alt -> Cyrl - 162, 120, // alw -> Ethi - 166, 120, // am -> Ethi - 169, 210, // ams -> Jpan - 173, 480, // amw -> Syrc - 177, 100, // ani -> Cyrl - 181, 105, // anp -> Deva - 185, 105, // anr -> Deva - 189, 120, // anu -> Ethi - 193, 45, // aot -> Beng - 197, 10, // apc -> Arab - 201, 10, // apd -> Arab - 205, 105, // aph -> Deva - 209, 100, // aqc -> Cyrl - 213, 10, // ar -> Arab - 216, 15, // arc -> Armi - 220, 10, // arq -> Arab - 224, 10, // ars -> Arab - 228, 10, // ary -> Arab - 232, 10, // arz -> Arab - 236, 45, // as -> Beng - 239, 455, // ase -> Sgnw - 243, 10, // ask -> Arab - 247, 10, // atn -> Arab - 251, 100, // atv -> Cyrl - 255, 10, // auj -> Arab - 259, 10, // auz -> Arab - 263, 100, // av -> Cyrl - 266, 10, // avd -> Arab - 270, 10, // avl -> Arab - 274, 105, // awa -> Deva - 278, 120, // awn -> Ethi - 282, 20, // axm -> Armn - 286, 10, // ayh -> Arab - 290, 10, // ayl -> Arab - 294, 10, // ayn -> Arab - 298, 10, // ayp -> Arab - 302, 10, // az_IQ -> Arab - 308, 10, // az_IR -> Arab - 314, 100, // az_RU -> Cyrl - 320, 10, // azb -> Arab - 324, 100, // ba -> Cyrl - 327, 10, // bal -> Arab - 331, 105, // bap -> Deva - 335, 30, // bax -> Bamu - 339, 125, // bbl -> Geor - 343, 120, // bcq -> Ethi - 347, 385, // bdv -> Orya - 351, 10, // bdz -> Arab - 355, 100, // be -> Cyrl - 358, 105, // bee -> Deva - 362, 10, // bej -> Arab - 366, 105, // bfb -> Deva - 370, 505, // bfq -> Taml - 374, 10, // bft -> Arab - 378, 540, // bfu -> Tibt - 382, 385, // bfw -> Orya - 386, 105, // bfy -> Deva - 390, 105, // bfz -> Deva - 394, 100, // bg -> Cyrl - 397, 105, // bgc -> Deva - 401, 105, // bgd -> Deva - 405, 10, // bgn -> Arab - 409, 10, // bgp -> Arab - 413, 105, // bgq -> Deva - 417, 105, // bgw -> Deva - 421, 150, // bgx -> Grek - 425, 105, // bha -> Deva - 429, 105, // bhb -> Deva - 433, 105, // bhd -> Deva - 437, 10, // bhe -> Arab - 441, 100, // bhh -> Cyrl - 445, 105, // bhi -> Deva - 449, 105, // bhj -> Deva - 453, 10, // bhm -> Arab - 457, 480, // bhn -> Syrc - 461, 105, // bho -> Deva - 465, 490, // bht -> Takr - 469, 105, // bhu -> Deva - 473, 105, // biy -> Deva - 477, 480, // bjf -> Syrc - 481, 120, // bji -> Ethi - 485, 105, // bjj -> Deva - 489, 10, // bjm -> Arab - 493, 345, // blk -> Mymr - 497, 515, // blt -> Tavt - 501, 105, // bmj -> Deva - 505, 45, // bn -> Beng - 508, 105, // bns -> Deva - 512, 540, // bo -> Tibt - 515, 100, // bph -> Cyrl - 519, 105, // bpx -> Deva - 523, 45, // bpy -> Beng - 527, 10, // bqi -> Arab - 531, 105, // bra -> Deva - 535, 235, // brb -> Khmr - 539, 105, // brd -> Deva - 543, 10, // brh -> Arab - 547, 10, // brk -> Arab - 551, 260, // brv -> Laoo - 555, 105, // brx -> Deva - 559, 10, // bsh -> Arab - 563, 10, // bsk -> Arab - 567, 35, // bsq -> Bass - 571, 120, // bst -> Ethi - 575, 40, // btd -> Batk - 579, 40, // btm -> Batk - 583, 105, // btv -> Deva - 587, 100, // bua -> Cyrl - 591, 345, // bwe -> Mymr - 595, 100, // bxm -> Cyrl - 599, 330, // bxu -> Mong - 603, 105, // byh -> Deva - 607, 120, // byn -> Ethi - 611, 105, // byw -> Deva - 615, 535, // bzi -> Thai - 619, 535, // cbn -> Thai - 623, 60, // ccp -> Cakm - 627, 520, // cde -> Telu - 631, 105, // cdh -> Deva - 635, 155, // cdi -> Gujr - 639, 105, // cdj -> Deva - 643, 105, // cdm -> Deva - 647, 175, // cdo -> Hans - 651, 45, // cdz -> Beng - 655, 100, // ce -> Cyrl - 658, 540, // cgk -> Tibt - 662, 10, // chg -> Arab - 666, 100, // chm -> Cyrl - 670, 80, // chr -> Cher - 674, 105, // chx -> Deva - 678, 105, // cih -> Deva - 682, 10, // cja -> Arab - 686, 100, // cji -> Cyrl - 690, 75, // cjm -> Cham - 694, 175, // cjy -> Hans - 698, 10, // ckb -> Arab - 702, 100, // ckt -> Cyrl - 706, 10, // clh -> Arab - 710, 100, // clw -> Cyrl - 714, 475, // cmg -> Soyo - 718, 540, // cna -> Tibt - 722, 175, // cnp -> Hans - 726, 535, // cog -> Thai - 730, 90, // cop -> Copt - 734, 150, // cpg -> Grek - 738, 65, // cr -> Cans - 741, 100, // crh -> Cyrl - 745, 65, // crj -> Cans - 749, 65, // crk -> Cans - 753, 65, // crl -> Cans - 757, 65, // crm -> Cans - 761, 345, // csh -> Mymr - 765, 175, // csp -> Hans - 769, 65, // csw -> Cans - 773, 400, // ctd -> Pauc - 777, 45, // ctg -> Beng - 781, 105, // ctn -> Deva - 785, 505, // ctt -> Taml - 789, 100, // cu -> Cyrl - 792, 255, // cuu -> Lana - 796, 100, // cv -> Cyrl - 799, 175, // czh -> Hans - 803, 185, // czk -> Hebr - 807, 105, // daq -> Deva - 811, 100, // dar -> Cyrl - 815, 10, // dcc -> Arab - 819, 100, // ddo -> Cyrl - 823, 10, // def -> Arab - 827, 10, // deh -> Arab - 831, 45, // der -> Beng - 835, 10, // dgl -> Arab - 839, 105, // dhi -> Deva - 843, 155, // dhn -> Gujr - 847, 105, // dho -> Deva - 851, 105, // dhw -> Deva - 855, 540, // dka -> Tibt - 859, 100, // dlg -> Cyrl - 863, 310, // dmf -> Medf - 867, 10, // dmk -> Arab - 871, 10, // dml -> Arab - 875, 100, // dng -> Cyrl - 879, 345, // dnu -> Mymr - 883, 345, // dnv -> Mymr - 887, 105, // doi -> Deva - 891, 120, // dox -> Ethi - 895, 540, // dre -> Tibt - 899, 330, // drh -> Mong - 903, 105, // drq -> Deva - 907, 120, // drs -> Ethi - 911, 105, // dry -> Deva - 915, 385, // dso -> Orya - 919, 105, // dty -> Deva - 923, 155, // dub -> Gujr - 927, 105, // duh -> Deva - 931, 105, // dus -> Deva - 935, 530, // dv -> Thaa - 938, 385, // dwk -> Orya - 942, 105, // dwz -> Deva - 946, 540, // dz -> Tibt - 949, 540, // dzl -> Tibt - 953, 150, // ecr -> Grek - 957, 95, // ecy -> Cprt - 961, 110, // egy -> Egyp - 965, 215, // eky -> Kali - 969, 150, // el -> Grek - 972, 105, // emg -> Deva - 976, 105, // emu -> Deva - 980, 100, // enf -> Cyrl - 984, 100, // enh -> Cyrl - 988, 505, // era -> Taml - 992, 135, // esg -> Gonm - 996, 10, // esh -> Arab - 1000, 200, // ett -> Ital - 1004, 100, // eve -> Cyrl - 1008, 100, // evn -> Cyrl - 1012, 10, // fa -> Arab - 1015, 10, // fay -> Arab - 1019, 10, // faz -> Arab - 1023, 10, // fia -> Arab - 1027, 105, // fmu -> Deva - 1031, 10, // fub -> Arab - 1035, 175, // gan -> Hans - 1039, 385, // gaq -> Orya - 1043, 155, // gas -> Gujr - 1047, 520, // gau -> Telu - 1051, 385, // gbj -> Orya - 1055, 105, // gbk -> Deva - 1059, 155, // gbl -> Gujr - 1063, 105, // gbm -> Deva - 1067, 10, // gbz -> Arab - 1071, 385, // gdb -> Orya - 1075, 100, // gdo -> Cyrl - 1079, 105, // gdx -> Deva - 1083, 120, // gez -> Ethi - 1087, 10, // ggg -> Arab - 1091, 105, // ggn -> Deva - 1095, 10, // gha -> Arab - 1099, 105, // ghe -> Deva - 1103, 10, // ghr -> Arab - 1107, 540, // ght -> Tibt - 1111, 10, // gig -> Arab - 1115, 100, // gin -> Cyrl - 1119, 10, // gjk -> Arab - 1123, 10, // gju -> Arab - 1127, 100, // gld -> Cyrl - 1131, 10, // glh -> Arab - 1135, 10, // glk -> Arab - 1139, 120, // gmv -> Ethi - 1143, 275, // gmy -> Linb - 1147, 540, // goe -> Tibt - 1151, 120, // gof -> Ethi - 1155, 105, // gok -> Deva - 1159, 105, // gom -> Deva - 1163, 520, // gon -> Telu - 1167, 140, // got -> Goth - 1171, 105, // gra -> Deva - 1175, 95, // grc -> Cprt - 1179, 45, // grt -> Beng - 1183, 120, // gru -> Ethi - 1187, 155, // gu -> Gujr - 1190, 105, // gvr -> Deva - 1194, 10, // gwc -> Arab - 1198, 10, // gwf -> Arab - 1202, 10, // gwt -> Arab - 1206, 105, // gyo -> Deva - 1210, 10, // gzi -> Arab - 1214, 10, // ha_CM -> Arab - 1220, 10, // ha_SD -> Arab - 1226, 10, // hac -> Arab - 1230, 175, // hak -> Hans - 1234, 120, // har -> Ethi - 1238, 10, // haz -> Arab - 1242, 185, // hbo -> Hebr - 1246, 120, // hdy -> Ethi - 1250, 185, // he -> Hebr - 1253, 105, // hi -> Deva - 1256, 490, // hii -> Takr - 1260, 575, // hit -> Xsux - 1264, 10, // hkh -> Arab - 1268, 105, // hlb -> Deva - 1272, 190, // hlu -> Hluw - 1276, 415, // hmd -> Plrd - 1280, 50, // hmj -> Bopo - 1284, 50, // hmq -> Bopo - 1288, 10, // hnd -> Arab - 1292, 105, // hne -> Deva - 1296, 195, // hnj -> Hmnp - 1300, 260, // hnj_AU -> Laoo - 1307, 260, // hnj_CN -> Laoo - 1314, 260, // hnj_FR -> Laoo - 1321, 260, // hnj_GF -> Laoo - 1328, 260, // hnj_LA -> Laoo - 1335, 260, // hnj_MM -> Laoo - 1342, 260, // hnj_SR -> Laoo - 1349, 260, // hnj_TH -> Laoo - 1356, 195, // hnj_US -> Hmnp - 1363, 260, // hnj_VN -> Laoo - 1370, 10, // hno -> Arab - 1374, 105, // hoc -> Deva - 1378, 10, // hoh -> Arab - 1382, 105, // hoj -> Deva - 1386, 170, // how -> Hani - 1390, 105, // hoy -> Deva - 1394, 345, // hpo -> Mymr - 1398, 480, // hrt -> Syrc - 1402, 10, // hrz -> Arab - 1406, 175, // hsn -> Hans - 1410, 10, // hss -> Arab - 1414, 575, // htx -> Xsux - 1418, 105, // hut -> Deva - 1422, 185, // huy -> Hebr - 1426, 100, // huz -> Cyrl - 1430, 20, // hy -> Armn - 1433, 20, // hyw -> Armn - 1437, 580, // ii -> Yiii - 1440, 285, // imy -> Lyci - 1444, 100, // inh -> Cyrl - 1448, 345, // int -> Mymr - 1452, 120, // ior -> Ethi - 1456, 505, // iru -> Taml - 1460, 10, // isk -> Arab - 1464, 185, // itk -> Hebr - 1468, 100, // itl -> Cyrl - 1472, 65, // iu -> Cans - 1475, 185, // iw -> Hebr - 1478, 210, // ja -> Jpan - 1481, 10, // jad -> Arab - 1485, 10, // jat -> Arab - 1489, 185, // jbe -> Hebr - 1493, 10, // jbn -> Arab - 1497, 100, // jct -> Cyrl - 1501, 540, // jda -> Tibt - 1505, 10, // jdg -> Arab - 1509, 100, // jdt -> Cyrl - 1513, 105, // jee -> Deva - 1517, 125, // jge -> Geor - 1521, 185, // ji -> Hebr - 1524, 165, // jje -> Hang - 1528, 345, // jkm -> Mymr - 1532, 105, // jml -> Deva - 1536, 490, // jna -> Takr - 1540, 10, // jnd -> Arab - 1544, 105, // jnl -> Deva - 1548, 105, // jns -> Deva - 1552, 10, // jog -> Arab - 1556, 185, // jpa -> Hebr - 1560, 185, // jpr -> Hebr - 1564, 105, // jul -> Deva - 1568, 385, // jun -> Orya - 1572, 385, // juy -> Orya - 1576, 540, // jya -> Tibt - 1580, 185, // jye -> Hebr - 1584, 125, // ka -> Geor - 1587, 100, // kaa -> Cyrl - 1591, 100, // kap -> Cyrl - 1595, 225, // kaw -> Kawi - 1599, 100, // kbd -> Cyrl - 1603, 10, // kbu -> Arab - 1607, 10, // kby -> Arab - 1611, 100, // kca -> Cyrl - 1615, 45, // kdq -> Beng - 1619, 535, // kdt -> Thai - 1623, 100, // ket -> Cyrl - 1627, 105, // kex -> Deva - 1631, 520, // key -> Telu - 1635, 245, // kfa -> Knda - 1639, 105, // kfb -> Deva - 1643, 520, // kfc -> Telu - 1647, 245, // kfd -> Knda - 1651, 505, // kfe -> Taml - 1655, 320, // kfh -> Mlym - 1659, 505, // kfi -> Taml - 1663, 105, // kfk -> Deva - 1667, 10, // kfm -> Arab - 1671, 105, // kfp -> Deva - 1675, 105, // kfq -> Deva - 1679, 105, // kfr -> Deva - 1683, 105, // kfs -> Deva - 1687, 105, // kfx -> Deva - 1691, 105, // kfy -> Deva - 1695, 105, // kgj -> Deva - 1699, 105, // kgy -> Deva - 1703, 500, // khb -> Talu - 1707, 535, // khf -> Thai - 1711, 540, // khg -> Tibt - 1715, 105, // khn -> Deva - 1719, 345, // kht -> Mymr - 1723, 100, // khv -> Cyrl - 1727, 10, // khw -> Arab - 1731, 105, // kif -> Deva - 1735, 100, // kim -> Cyrl - 1739, 105, // kip -> Deva - 1743, 260, // kjg -> Laoo - 1747, 100, // kjh -> Cyrl - 1751, 105, // kjl -> Deva - 1755, 105, // kjo -> Deva - 1759, 345, // kjp -> Mymr - 1763, 535, // kjt -> Thai - 1767, 100, // kk -> Cyrl - 1770, 10, // kk_AF -> Arab - 1776, 10, // kk_CN -> Arab - 1782, 10, // kk_IR -> Arab - 1788, 10, // kk_MN -> Arab - 1794, 540, // kkf -> Tibt - 1798, 255, // kkh -> Lana - 1802, 105, // kkt -> Deva - 1806, 105, // kle -> Deva - 1810, 10, // klj -> Arab - 1814, 105, // klr -> Deva - 1818, 235, // km -> Khmr - 1821, 105, // kmj -> Deva - 1825, 10, // kmz -> Arab - 1829, 245, // kn -> Knda - 1832, 250, // ko -> Kore - 1835, 100, // koi -> Cyrl - 1839, 105, // kok -> Deva - 1843, 100, // kpt -> Cyrl - 1847, 100, // kpy -> Cyrl - 1851, 480, // kqd -> Syrc - 1855, 120, // kqy -> Ethi - 1859, 105, // kra -> Deva - 1863, 100, // krc -> Cyrl - 1867, 100, // krk -> Cyrl - 1871, 235, // krr -> Khmr - 1875, 105, // kru -> Deva - 1879, 235, // krv -> Khmr - 1883, 10, // ks -> Arab - 1886, 345, // ksu -> Mymr - 1890, 345, // ksw -> Mymr - 1894, 105, // ksz -> Deva - 1898, 120, // ktb -> Ethi - 1902, 10, // ktl -> Arab - 1906, 415, // ktp -> Plrd - 1910, 10, // ku_LB -> Arab - 1916, 260, // kuf -> Laoo - 1920, 100, // kum -> Cyrl - 1924, 100, // kv -> Cyrl - 1927, 100, // kva -> Cyrl - 1931, 345, // kvq -> Mymr - 1935, 345, // kvt -> Mymr - 1939, 10, // kvx -> Arab - 1943, 215, // kvy -> Kali - 1947, 120, // kxc -> Ethi - 1951, 345, // kxf -> Mymr - 1955, 345, // kxk -> Mymr - 1959, 105, // kxl -> Deva - 1963, 535, // kxm -> Thai - 1967, 10, // kxp -> Arab - 1971, 385, // kxv -> Orya - 1975, 100, // ky -> Cyrl - 1978, 10, // ky_CN -> Arab - 1984, 215, // kyu -> Kali - 1988, 105, // kyv -> Deva - 1992, 105, // kyw -> Deva - 1996, 10, // kzh -> Arab - 2000, 270, // lab -> Lina - 2004, 185, // lad -> Hebr - 2008, 105, // lae -> Deva - 2012, 10, // lah -> Arab - 2016, 280, // lbc -> Lisu - 2020, 100, // lbe -> Cyrl - 2024, 105, // lbf -> Deva - 2028, 540, // lbj -> Tibt - 2032, 105, // lbm -> Deva - 2036, 260, // lbo -> Laoo - 2040, 105, // lbr -> Deva - 2044, 535, // lcp -> Thai - 2048, 265, // lep -> Lepc - 2052, 100, // lez -> Cyrl - 2056, 105, // lhm -> Deva - 2060, 480, // lhs -> Syrc - 2064, 105, // lif -> Deva - 2068, 280, // lis -> Lisu - 2072, 540, // lkh -> Tibt - 2076, 10, // lki -> Arab - 2080, 105, // lmh -> Deva - 2084, 520, // lmn -> Telu - 2088, 260, // lo -> Laoo - 2091, 105, // loy -> Deva - 2095, 415, // lpo -> Plrd - 2099, 10, // lrc -> Arab - 2103, 10, // lrk -> Arab - 2107, 10, // lrl -> Arab - 2111, 10, // lsa -> Arab - 2115, 185, // lsd -> Hebr - 2119, 10, // lss -> Arab - 2123, 540, // luk -> Tibt - 2127, 105, // luu -> Deva - 2131, 10, // luv -> Arab - 2135, 10, // luz -> Arab - 2139, 535, // lwl -> Thai - 2143, 535, // lwm -> Thai - 2147, 540, // lya -> Tibt - 2151, 175, // lzh -> Hans - 2155, 105, // mag -> Deva - 2159, 105, // mai -> Deva - 2163, 360, // man_GN -> Nkoo - 2170, 10, // mby -> Arab - 2174, 10, // mde -> Arab - 2178, 100, // mdf -> Cyrl - 2182, 120, // mdx -> Ethi - 2186, 120, // mdy -> Ethi - 2190, 10, // mfa -> Arab - 2194, 10, // mfi -> Arab - 2198, 105, // mgp -> Deva - 2202, 10, // mhj -> Arab - 2206, 295, // mid -> Mand - 2210, 105, // mjl -> Deva - 2214, 320, // mjq -> Mlym - 2218, 320, // mjr -> Mlym - 2222, 105, // mjt -> Deva - 2226, 520, // mju -> Telu - 2230, 320, // mjv -> Mlym - 2234, 105, // mjz -> Deva - 2238, 100, // mk -> Cyrl - 2241, 105, // mkb -> Deva - 2245, 105, // mke -> Deva - 2249, 10, // mki -> Arab - 2253, 535, // mkm -> Thai - 2257, 320, // ml -> Mlym - 2260, 535, // mlf -> Thai - 2264, 100, // mn -> Cyrl - 2267, 330, // mn_CN -> Mong - 2273, 45, // mni -> Beng - 2277, 10, // mnj -> Arab - 2281, 100, // mns -> Cyrl - 2285, 345, // mnw -> Mymr - 2289, 535, // mpz -> Thai - 2293, 105, // mr -> Deva - 2296, 535, // mra -> Thai - 2300, 105, // mrd -> Deva - 2304, 100, // mrj -> Cyrl - 2308, 335, // mro -> Mroo - 2312, 105, // mrr -> Deva - 2316, 10, // ms_CC -> Arab - 2322, 100, // mtm -> Cyrl - 2326, 105, // mtr -> Deva - 2330, 100, // mud -> Cyrl - 2334, 540, // muk -> Tibt - 2338, 105, // mut -> Deva - 2342, 505, // muv -> Taml - 2346, 120, // muz -> Ethi - 2350, 330, // mvf -> Mong - 2354, 10, // mvy -> Arab - 2358, 120, // mvz -> Ethi - 2362, 105, // mwr -> Deva - 2366, 345, // mwt -> Mymr - 2370, 195, // mww -> Hmnp - 2374, 345, // my -> Mymr - 2377, 120, // mym -> Ethi - 2381, 100, // myv -> Cyrl - 2385, 295, // myz -> Mand - 2389, 10, // mzn -> Arab - 2393, 175, // nan -> Hans - 2397, 105, // nao -> Deva - 2401, 105, // ncd -> Deva - 2405, 260, // ncq -> Laoo - 2409, 100, // ndf -> Cyrl - 2413, 105, // ne -> Deva - 2416, 100, // neg -> Cyrl - 2420, 540, // neh -> Tibt - 2424, 575, // nei -> Xsux - 2428, 105, // new -> Deva - 2432, 260, // ngt -> Laoo - 2436, 100, // nio -> Cyrl - 2440, 520, // nit -> Telu - 2444, 100, // niv -> Cyrl - 2448, 10, // nli -> Arab - 2452, 10, // nlm -> Arab - 2456, 105, // nlx -> Deva - 2460, 105, // nmm -> Deva - 2464, 565, // nnp -> Wcho - 2468, 255, // nod -> Lana - 2472, 105, // noe -> Deva - 2476, 100, // nog -> Cyrl - 2480, 105, // noi -> Deva - 2484, 435, // non -> Runr - 2488, 580, // nos -> Yiii - 2492, 540, // npb -> Tibt - 2496, 360, // nqo -> Nkoo - 2500, 580, // nsd -> Yiii - 2504, 580, // nsf -> Yiii - 2508, 65, // nsk -> Cans - 2512, 545, // nst -> Tnsa - 2516, 580, // nsv -> Yiii - 2520, 580, // nty -> Yiii - 2524, 10, // ntz -> Arab - 2528, 355, // nwc -> Newa - 2532, 105, // nwx -> Deva - 2536, 535, // nyl -> Thai - 2540, 10, // nyq -> Arab - 2544, 100, // oaa -> Cyrl - 2548, 100, // oac -> Cyrl - 2552, 480, // oar -> Syrc - 2556, 125, // oav -> Geor - 2560, 410, // obm -> Phnx - 2564, 345, // obr -> Mymr - 2568, 10, // odk -> Arab - 2572, 575, // oht -> Xsux - 2576, 65, // oj -> Cans - 2579, 65, // ojs -> Cans - 2583, 165, // okm -> Hang - 2587, 170, // oko -> Hani - 2591, 235, // okz -> Khmr - 2595, 105, // ola -> Deva - 2599, 540, // ole -> Tibt - 2603, 100, // omk -> Cyrl - 2607, 340, // omp -> Mtei - 2611, 325, // omr -> Modi - 2615, 105, // oon -> Deva - 2619, 385, // or -> Orya - 2622, 520, // ort -> Telu - 2626, 10, // oru -> Arab - 2630, 100, // orv -> Cyrl - 2634, 100, // os -> Cyrl - 2637, 390, // osa -> Osge - 2641, 200, // osc -> Ital - 2645, 205, // osi -> Java - 2649, 10, // ota -> Arab - 2653, 540, // otb -> Tibt - 2657, 380, // otk -> Orkh - 2661, 145, // oty -> Gran - 2665, 395, // oui -> Ougr - 2669, 160, // pa -> Guru - 2672, 10, // pa_PK -> Arab - 2678, 405, // pal -> Phli - 2682, 100, // paq -> Cyrl - 2686, 10, // pbt -> Arab - 2690, 235, // pcb -> Khmr - 2694, 345, // pce -> Mymr - 2698, 320, // pcf -> Mlym - 2702, 320, // pcg -> Mlym - 2706, 105, // pch -> Deva - 2710, 105, // pci -> Deva - 2714, 520, // pcj -> Telu - 2718, 385, // peg -> Orya - 2722, 570, // peo -> Xpeo - 2726, 230, // pgd -> Khar - 2730, 105, // pgg -> Deva - 2734, 370, // pgl -> Ogam - 2738, 200, // pgn -> Ital - 2742, 105, // phd -> Deva - 2746, 345, // phk -> Mymr - 2750, 10, // phl -> Arab - 2754, 410, // phn -> Phnx - 2758, 260, // pho -> Laoo - 2762, 10, // phr -> Arab - 2766, 535, // pht -> Thai - 2770, 10, // phv -> Arab - 2774, 105, // phw -> Deva - 2778, 460, // pi -> Sinh - 2781, 55, // pka -> Brah - 2785, 320, // pkr -> Mlym - 2789, 10, // plk -> Arab - 2793, 345, // pll -> Mymr - 2797, 55, // pmh -> Brah - 2801, 150, // pnt -> Grek - 2805, 105, // ppa -> Deva - 2809, 230, // pra -> Khar - 2813, 10, // prc -> Arab - 2817, 10, // prd -> Arab - 2821, 155, // prp -> Gujr - 2825, 535, // prt -> Thai - 2829, 10, // prx -> Arab - 2833, 10, // ps -> Arab - 2836, 10, // psh -> Arab - 2840, 10, // psi -> Arab - 2844, 10, // pst -> Arab - 2848, 105, // pum -> Deva - 2852, 345, // pwo -> Mymr - 2856, 105, // pwr -> Deva - 2860, 535, // pww -> Thai - 2864, 345, // pyx -> Mymr - 2868, 10, // qxq -> Arab - 2872, 105, // raa -> Deva - 2876, 105, // rab -> Deva - 2880, 105, // raf -> Deva - 2884, 45, // rah -> Beng - 2888, 105, // raj -> Deva - 2892, 105, // rav -> Deva - 2896, 345, // rbb -> Mymr - 2900, 10, // rdb -> Arab - 2904, 385, // rei -> Orya - 2908, 430, // rhg -> Rohg - 2912, 105, // rji -> Deva - 2916, 105, // rjs -> Deva - 2920, 235, // rka -> Khmr - 2924, 345, // rki -> Mymr - 2928, 45, // rkt -> Beng - 2932, 20, // rmi -> Armn - 2936, 10, // rmt -> Arab - 2940, 345, // rmz -> Mymr - 2944, 100, // rsk -> Cyrl - 2948, 105, // rtw -> Deva - 2952, 100, // ru -> Cyrl - 2955, 100, // rue -> Cyrl - 2959, 100, // rut -> Cyrl - 2963, 105, // rwr -> Deva - 2967, 220, // ryu -> Kana - 2971, 105, // sa -> Deva - 2974, 100, // sah -> Cyrl - 2978, 440, // sam -> Samr - 2982, 375, // sat -> Olck - 2986, 450, // saz -> Saur - 2990, 10, // sbn -> Arab - 2994, 540, // sbu -> Tibt - 2998, 105, // sck -> Deva - 3002, 10, // scl -> Arab - 3006, 105, // scp -> Deva - 3010, 260, // sct -> Laoo - 3014, 490, // scu -> Takr - 3018, 150, // scx -> Grek - 3022, 10, // sd -> Arab - 3025, 105, // sd_IN -> Deva - 3031, 10, // sdb -> Arab - 3035, 10, // sdf -> Arab - 3039, 10, // sdg -> Arab - 3043, 10, // sdh -> Arab - 3047, 10, // sds -> Arab - 3051, 100, // sel -> Cyrl - 3055, 415, // sfm -> Plrd - 3059, 370, // sga -> Ogam - 3063, 100, // sgh -> Cyrl - 3067, 105, // sgj -> Deva - 3071, 10, // sgr -> Arab - 3075, 540, // sgt -> Tibt - 3079, 120, // sgw -> Ethi - 3083, 10, // sgy -> Arab - 3087, 10, // shd -> Arab - 3091, 525, // shi -> Tfng - 3095, 10, // shm -> Arab - 3099, 345, // shn -> Mymr - 3103, 10, // shu -> Arab - 3107, 10, // shv -> Arab - 3111, 460, // si -> Sinh - 3114, 100, // sia -> Cyrl - 3118, 540, // sip -> Tibt - 3122, 10, // siy -> Arab - 3126, 10, // siz -> Arab - 3130, 100, // sjd -> Cyrl - 3134, 105, // sjp -> Deva - 3138, 100, // sjt -> Cyrl - 3142, 535, // skb -> Thai - 3146, 105, // skj -> Deva - 3150, 10, // skr -> Arab - 3154, 10, // slq -> Arab - 3158, 580, // smh -> Yiii - 3162, 440, // smp -> Samr - 3166, 235, // smu -> Khmr - 3170, 10, // smy -> Arab - 3174, 515, // soa -> Tavt - 3178, 465, // sog -> Sogd - 3182, 105, // soi -> Deva - 3186, 535, // sou -> Thai - 3190, 540, // spt -> Tibt - 3194, 385, // spv -> Orya - 3198, 10, // sqo -> Arab - 3202, 260, // sqq -> Laoo - 3206, 10, // sqt -> Arab - 3210, 100, // sr -> Cyrl - 3213, 470, // srb -> Sora - 3217, 10, // srh -> Arab - 3221, 105, // srx -> Deva - 3225, 10, // srz -> Arab - 3229, 10, // ssh -> Arab - 3233, 260, // sss -> Laoo - 3237, 10, // sts -> Arab - 3241, 120, // stv -> Ethi - 3245, 100, // sty -> Cyrl - 3249, 105, // suz -> Deva - 3253, 125, // sva -> Geor - 3257, 10, // swb -> Arab - 3261, 170, // swi -> Hani - 3265, 105, // swv -> Deva - 3269, 480, // syc -> Syrc - 3273, 45, // syl -> Beng - 3277, 480, // syn -> Syrc - 3281, 480, // syr -> Syrc - 3285, 105, // syw -> Deva - 3289, 505, // ta -> Taml - 3292, 100, // tab -> Cyrl - 3296, 105, // taj -> Deva - 3300, 485, // tbk -> Tagb - 3304, 540, // tcn -> Tibt - 3308, 345, // tco -> Mymr - 3312, 505, // tcx -> Taml - 3316, 245, // tcy -> Knda - 3320, 525, // tda -> Tfng - 3324, 105, // tdb -> Deva - 3328, 495, // tdd -> Tale - 3332, 105, // tdg -> Deva - 3336, 105, // tdh -> Deva - 3340, 520, // te -> Telu - 3343, 205, // tes -> Java - 3347, 100, // tg -> Cyrl - 3350, 10, // tg_PK -> Arab - 3356, 105, // tge -> Deva - 3360, 540, // tgf -> Tibt - 3364, 535, // th -> Thai - 3367, 105, // the -> Deva - 3371, 105, // thf -> Deva - 3375, 495, // thi -> Tale - 3379, 105, // thl -> Deva - 3383, 535, // thm -> Thai - 3387, 105, // thq -> Deva - 3391, 105, // thr -> Deva - 3395, 105, // ths -> Deva - 3399, 120, // ti -> Ethi - 3402, 120, // tig -> Ethi - 3406, 105, // tij -> Deva - 3410, 100, // tin -> Cyrl - 3414, 345, // tjl -> Mymr - 3418, 10, // tjo -> Arab - 3422, 105, // tkb -> Deva - 3426, 10, // tks -> Arab - 3430, 105, // tkt -> Deva - 3434, 105, // tmk -> Deva - 3438, 480, // tmr -> Syrc - 3442, 60, // tnv -> Cakm - 3446, 10, // tov -> Arab - 3450, 235, // tpu -> Khmr - 3454, 10, // tra -> Arab - 3458, 185, // trg -> Hebr - 3462, 10, // trm -> Arab - 3466, 10, // trw -> Arab - 3470, 150, // tsd -> Grek - 3474, 105, // tsf -> Deva - 3478, 540, // tsj -> Tibt - 3482, 100, // tt -> Cyrl - 3485, 260, // tth -> Laoo - 3489, 260, // tto -> Laoo - 3493, 535, // tts -> Thai - 3497, 345, // tvn -> Mymr - 3501, 105, // twm -> Deva - 3505, 510, // txg -> Tang - 3509, 550, // txo -> Toto - 3513, 515, // tyr -> Tavt - 3517, 100, // tyv -> Cyrl - 3521, 100, // ude -> Cyrl - 3525, 320, // udg -> Mlym - 3529, 0, // udi -> Aghb - 3533, 100, // udm -> Cyrl - 3537, 10, // ug -> Arab - 3540, 100, // ug_KZ -> Cyrl - 3546, 100, // ug_MN -> Cyrl - 3552, 555, // uga -> Ugar - 3556, 100, // ugh -> Cyrl - 3560, 535, // ugo -> Thai - 3564, 100, // uk -> Cyrl - 3567, 385, // uki -> Orya - 3571, 100, // ulc -> Cyrl - 3575, 45, // unr -> Beng - 3579, 105, // unr_NP -> Deva - 3586, 45, // unx -> Beng - 3590, 10, // ur -> Arab - 3593, 535, // urk -> Thai - 3597, 10, // ush -> Arab - 3601, 150, // uum -> Grek - 3605, 10, // uz_AF -> Arab - 3611, 100, // uz_CN -> Cyrl - 3617, 10, // uzs -> Arab - 3621, 505, // vaa -> Taml - 3625, 10, // vaf -> Arab - 3629, 105, // vah -> Deva - 3633, 560, // vai -> Vaii - 3637, 105, // vas -> Deva - 3641, 105, // vav -> Deva - 3645, 105, // vay -> Deva - 3649, 10, // vgr -> Arab - 3653, 245, // vmd -> Knda - 3657, 10, // vmh -> Arab - 3661, 120, // wal -> Ethi - 3665, 10, // wbk -> Arab - 3669, 520, // wbq -> Telu - 3673, 105, // wbr -> Deva - 3677, 10, // wlo -> Arab - 3681, 105, // wme -> Deva - 3685, 10, // wne -> Arab - 3689, 10, // wni -> Arab - 3693, 130, // wsg -> Gong - 3697, 10, // wsv -> Arab - 3701, 105, // wtm -> Deva - 3705, 175, // wuu -> Hans - 3709, 100, // xal -> Cyrl - 3713, 120, // xan -> Ethi - 3717, 100, // xas -> Cyrl - 3721, 85, // xco -> Chrs - 3725, 70, // xcr -> Cari - 3729, 100, // xdq -> Cyrl - 3733, 10, // xhe -> Arab - 3737, 235, // xhm -> Khmr - 3741, 385, // xis -> Orya - 3745, 10, // xka -> Arab - 3749, 10, // xkc -> Arab - 3753, 10, // xkj -> Arab - 3757, 10, // xkp -> Arab - 3761, 285, // xlc -> Lyci - 3765, 290, // xld -> Lydi - 3769, 115, // xly -> Elym - 3773, 125, // xmf -> Geor - 3777, 300, // xmn -> Mani - 3781, 315, // xmr -> Merc - 3785, 350, // xna -> Narb - 3789, 105, // xnr -> Deva - 3793, 150, // xpg -> Grek - 3797, 370, // xpi -> Ogam - 3801, 100, // xpm -> Cyrl - 3805, 420, // xpr -> Prti - 3809, 100, // xrm -> Cyrl - 3813, 100, // xrn -> Cyrl - 3817, 445, // xsa -> Sarb - 3821, 105, // xsr -> Deva - 3825, 100, // xss -> Cyrl - 3829, 505, // xub -> Taml - 3833, 505, // xuj -> Taml - 3837, 200, // xve -> Ital - 3841, 10, // xvi -> Arab - 3845, 100, // xwo -> Cyrl - 3849, 305, // xzh -> Marc - 3853, 100, // yai -> Cyrl - 3857, 105, // ybh -> Deva - 3861, 105, // ybi -> Deva - 3865, 10, // ydg -> Arab - 3869, 320, // yea -> Mlym - 3873, 150, // yej -> Grek - 3877, 520, // yeu -> Telu - 3881, 415, // ygp -> Plrd - 3885, 185, // yhd -> Hebr - 3889, 185, // yi -> Hebr - 3892, 580, // yig -> Yiii - 3896, 185, // yih -> Hebr - 3900, 580, // yiv -> Yiii - 3904, 100, // ykg -> Cyrl - 3908, 415, // yna -> Plrd - 3912, 100, // ynk -> Cyrl - 3916, 210, // yoi -> Jpan - 3920, 535, // yoy -> Thai - 3924, 100, // yrk -> Cyrl - 3928, 580, // ysd -> Yiii - 3932, 580, // ysn -> Yiii - 3936, 580, // ysp -> Yiii - 3940, 100, // ysr -> Cyrl - 3944, 415, // ysy -> Plrd - 3948, 185, // yud -> Hebr - 3952, 180, // yue -> Hant - 3956, 175, // yue_CN -> Hans - 3963, 100, // yug -> Cyrl - 3967, 100, // yux -> Cyrl - 3971, 415, // ywq -> Plrd - 3975, 415, // ywu -> Plrd - 3979, 540, // zau -> Tibt - 3983, 10, // zba -> Arab - 3987, 170, // zch -> Hani - 3991, 10, // zdj -> Arab - 3995, 170, // zeh -> Hani - 3999, 525, // zen -> Tfng - 4003, 170, // zgb -> Hani - 4007, 525, // zgh -> Tfng - 4011, 170, // zgm -> Hani - 4015, 170, // zgn -> Hani - 4019, 175, // zh -> Hans - 4022, 180, // zh_AU -> Hant - 4028, 180, // zh_BN -> Hant - 4034, 180, // zh_GB -> Hant - 4040, 180, // zh_GF -> Hant - 4046, 180, // zh_HK -> Hant - 4052, 180, // zh_ID -> Hant - 4058, 180, // zh_MO -> Hant - 4064, 180, // zh_PA -> Hant - 4070, 180, // zh_PF -> Hant - 4076, 180, // zh_PH -> Hant - 4082, 180, // zh_SR -> Hant - 4088, 180, // zh_TH -> Hant - 4094, 180, // zh_TW -> Hant - 4100, 180, // zh_US -> Hant - 4106, 180, // zh_VN -> Hant - 4112, 170, // zhd -> Hani - 4116, 365, // zhx -> Nshu - 4120, 100, // zkb -> Cyrl - 4124, 100, // zko -> Cyrl - 4128, 240, // zkt -> Kits - 4132, 100, // zkz -> Cyrl - 4136, 170, // zlj -> Hani - 4140, 170, // zln -> Hani - 4144, 170, // zlq -> Hani - 4148, 170, // zqe -> Hani - 4152, 185, // zrp -> Hebr - 4156, 10, // zum -> Arab - 4160, 170, // zyg -> Hani - 4164, 170, // zyn -> Hani - 4168, 170, // zzj -> Hani + 19, 420, // abl -> Rjng + 23, 10, // abv -> Arab + 27, 10, // acm -> Arab + 31, 10, // acq -> Arab + 35, 10, // acw -> Arab + 39, 10, // acx -> Arab + 43, 10, // adf -> Arab + 47, 535, // adx -> Tibt + 51, 100, // ady -> Cyrl + 55, 25, // ae -> Avst + 58, 10, // aeb -> Arab + 62, 10, // aec -> Arab + 66, 10, // aee -> Arab + 70, 10, // aeq -> Arab + 74, 10, // afb -> Arab + 78, 105, // agi -> Deva + 82, 120, // agj -> Ethi + 86, 100, // agx -> Cyrl + 90, 120, // ahg -> Ethi + 94, 5, // aho -> Ahom + 98, 105, // ahr -> Deva + 102, 10, // aib -> Arab + 106, 185, // aij -> Hebr + 110, 220, // ain -> Kana + 114, 345, // aio -> Mymr + 118, 10, // aiq -> Arab + 122, 10, // ajp -> Arab + 126, 570, // akk -> Xsux + 130, 100, // akv -> Cyrl + 134, 260, // alk -> Laoo + 138, 320, // all -> Mlym + 142, 100, // alr -> Cyrl + 146, 100, // alt -> Cyrl + 150, 120, // alw -> Ethi + 154, 120, // am -> Ethi + 157, 210, // ams -> Jpan + 161, 475, // amw -> Syrc + 165, 100, // ani -> Cyrl + 169, 105, // anp -> Deva + 173, 105, // anr -> Deva + 177, 120, // anu -> Ethi + 181, 45, // aot -> Beng + 185, 10, // apc -> Arab + 189, 10, // apd -> Arab + 193, 105, // aph -> Deva + 197, 100, // aqc -> Cyrl + 201, 10, // ar -> Arab + 204, 15, // arc -> Armi + 208, 10, // arq -> Arab + 212, 10, // ars -> Arab + 216, 10, // ary -> Arab + 220, 10, // arz -> Arab + 224, 45, // as -> Beng + 227, 450, // ase -> Sgnw + 231, 10, // ask -> Arab + 235, 10, // atn -> Arab + 239, 100, // atv -> Cyrl + 243, 10, // auj -> Arab + 247, 10, // auz -> Arab + 251, 100, // av -> Cyrl + 254, 10, // avd -> Arab + 258, 10, // avl -> Arab + 262, 105, // awa -> Deva + 266, 120, // awn -> Ethi + 270, 20, // axm -> Armn + 274, 10, // ayh -> Arab + 278, 10, // ayl -> Arab + 282, 10, // ayn -> Arab + 286, 10, // ayp -> Arab + 290, 10, // az_IQ -> Arab + 296, 10, // az_IR -> Arab + 302, 100, // az_RU -> Cyrl + 308, 10, // azb -> Arab + 312, 100, // ba -> Cyrl + 315, 10, // bal -> Arab + 319, 105, // bap -> Deva + 323, 30, // bax -> Bamu + 327, 125, // bbl -> Geor + 331, 120, // bcq -> Ethi + 335, 385, // bdv -> Orya + 339, 10, // bdz -> Arab + 343, 100, // be -> Cyrl + 346, 105, // bee -> Deva + 350, 10, // bej -> Arab + 354, 105, // bfb -> Deva + 358, 500, // bfq -> Taml + 362, 10, // bft -> Arab + 366, 535, // bfu -> Tibt + 370, 385, // bfw -> Orya + 374, 105, // bfy -> Deva + 378, 105, // bfz -> Deva + 382, 100, // bg -> Cyrl + 385, 105, // bgc -> Deva + 389, 105, // bgd -> Deva + 393, 10, // bgn -> Arab + 397, 10, // bgp -> Arab + 401, 105, // bgq -> Deva + 405, 105, // bgw -> Deva + 409, 150, // bgx -> Grek + 413, 105, // bha -> Deva + 417, 105, // bhb -> Deva + 421, 105, // bhd -> Deva + 425, 10, // bhe -> Arab + 429, 100, // bhh -> Cyrl + 433, 105, // bhi -> Deva + 437, 105, // bhj -> Deva + 441, 10, // bhm -> Arab + 445, 475, // bhn -> Syrc + 449, 105, // bho -> Deva + 453, 485, // bht -> Takr + 457, 105, // bhu -> Deva + 461, 105, // biy -> Deva + 465, 475, // bjf -> Syrc + 469, 105, // bjj -> Deva + 473, 10, // bjm -> Arab + 477, 345, // blk -> Mymr + 481, 510, // blt -> Tavt + 485, 105, // bmj -> Deva + 489, 45, // bn -> Beng + 492, 105, // bns -> Deva + 496, 535, // bo -> Tibt + 499, 100, // bph -> Cyrl + 503, 105, // bpx -> Deva + 507, 45, // bpy -> Beng + 511, 10, // bqi -> Arab + 515, 105, // bra -> Deva + 519, 235, // brb -> Khmr + 523, 105, // brd -> Deva + 527, 10, // brh -> Arab + 531, 10, // brk -> Arab + 535, 260, // brv -> Laoo + 539, 105, // brx -> Deva + 543, 10, // bsh -> Arab + 547, 10, // bsk -> Arab + 551, 35, // bsq -> Bass + 555, 120, // bst -> Ethi + 559, 40, // btd -> Batk + 563, 40, // btm -> Batk + 567, 105, // btv -> Deva + 571, 100, // bua -> Cyrl + 575, 345, // bwe -> Mymr + 579, 100, // bxm -> Cyrl + 583, 330, // bxu -> Mong + 587, 105, // byh -> Deva + 591, 120, // byn -> Ethi + 595, 105, // byw -> Deva + 599, 530, // bzi -> Thai + 603, 530, // cbn -> Thai + 607, 60, // ccp -> Cakm + 611, 515, // cde -> Telu + 615, 105, // cdh -> Deva + 619, 155, // cdi -> Gujr + 623, 105, // cdj -> Deva + 627, 105, // cdm -> Deva + 631, 175, // cdo -> Hans + 635, 45, // cdz -> Beng + 639, 100, // ce -> Cyrl + 642, 535, // cgk -> Tibt + 646, 10, // chg -> Arab + 650, 100, // chm -> Cyrl + 654, 80, // chr -> Cher + 658, 105, // chx -> Deva + 662, 105, // cih -> Deva + 666, 10, // cja -> Arab + 670, 100, // cji -> Cyrl + 674, 75, // cjm -> Cham + 678, 175, // cjy -> Hans + 682, 10, // ckb -> Arab + 686, 100, // ckt -> Cyrl + 690, 10, // clh -> Arab + 694, 100, // clw -> Cyrl + 698, 470, // cmg -> Soyo + 702, 535, // cna -> Tibt + 706, 175, // cnp -> Hans + 710, 530, // cog -> Thai + 714, 90, // cop -> Copt + 718, 150, // cpg -> Grek + 722, 65, // cr -> Cans + 725, 100, // crh -> Cyrl + 729, 65, // crj -> Cans + 733, 65, // crk -> Cans + 737, 65, // crl -> Cans + 741, 65, // crm -> Cans + 745, 345, // csh -> Mymr + 749, 175, // csp -> Hans + 753, 65, // csw -> Cans + 757, 395, // ctd -> Pauc + 761, 45, // ctg -> Beng + 765, 105, // ctn -> Deva + 769, 500, // ctt -> Taml + 773, 100, // cu -> Cyrl + 776, 255, // cuu -> Lana + 780, 100, // cv -> Cyrl + 783, 175, // czh -> Hans + 787, 185, // czk -> Hebr + 791, 105, // daq -> Deva + 795, 100, // dar -> Cyrl + 799, 10, // dcc -> Arab + 803, 100, // ddo -> Cyrl + 807, 10, // def -> Arab + 811, 10, // deh -> Arab + 815, 45, // der -> Beng + 819, 105, // dhi -> Deva + 823, 155, // dhn -> Gujr + 827, 105, // dho -> Deva + 831, 105, // dhw -> Deva + 835, 535, // dka -> Tibt + 839, 100, // dlg -> Cyrl + 843, 310, // dmf -> Medf + 847, 10, // dmk -> Arab + 851, 10, // dml -> Arab + 855, 100, // dng -> Cyrl + 859, 345, // dnu -> Mymr + 863, 345, // dnv -> Mymr + 867, 105, // doi -> Deva + 871, 120, // dox -> Ethi + 875, 535, // dre -> Tibt + 879, 105, // drq -> Deva + 883, 120, // drs -> Ethi + 887, 105, // dry -> Deva + 891, 385, // dso -> Orya + 895, 105, // dty -> Deva + 899, 155, // dub -> Gujr + 903, 105, // duh -> Deva + 907, 105, // dus -> Deva + 911, 525, // dv -> Thaa + 914, 385, // dwk -> Orya + 918, 105, // dwz -> Deva + 922, 535, // dz -> Tibt + 925, 535, // dzl -> Tibt + 929, 150, // ecr -> Grek + 933, 95, // ecy -> Cprt + 937, 110, // egy -> Egyp + 941, 215, // eky -> Kali + 945, 150, // el -> Grek + 948, 105, // emg -> Deva + 952, 105, // emu -> Deva + 956, 100, // enf -> Cyrl + 960, 100, // enh -> Cyrl + 964, 500, // era -> Taml + 968, 135, // esg -> Gonm + 972, 10, // esh -> Arab + 976, 200, // ett -> Ital + 980, 100, // eve -> Cyrl + 984, 100, // evn -> Cyrl + 988, 10, // fa -> Arab + 991, 10, // fay -> Arab + 995, 10, // faz -> Arab + 999, 10, // fia -> Arab + 1003, 105, // fmu -> Deva + 1007, 10, // fub -> Arab + 1011, 175, // gan -> Hans + 1015, 385, // gaq -> Orya + 1019, 155, // gas -> Gujr + 1023, 515, // gau -> Telu + 1027, 385, // gbj -> Orya + 1031, 105, // gbk -> Deva + 1035, 155, // gbl -> Gujr + 1039, 105, // gbm -> Deva + 1043, 10, // gbz -> Arab + 1047, 385, // gdb -> Orya + 1051, 100, // gdo -> Cyrl + 1055, 105, // gdx -> Deva + 1059, 120, // gez -> Ethi + 1063, 10, // ggg -> Arab + 1067, 10, // gha -> Arab + 1071, 105, // ghe -> Deva + 1075, 10, // ghr -> Arab + 1079, 535, // ght -> Tibt + 1083, 10, // gig -> Arab + 1087, 100, // gin -> Cyrl + 1091, 10, // gjk -> Arab + 1095, 10, // gju -> Arab + 1099, 100, // gld -> Cyrl + 1103, 10, // glh -> Arab + 1107, 10, // glk -> Arab + 1111, 120, // gmv -> Ethi + 1115, 275, // gmy -> Linb + 1119, 535, // goe -> Tibt + 1123, 120, // gof -> Ethi + 1127, 105, // gok -> Deva + 1131, 105, // gom -> Deva + 1135, 515, // gon -> Telu + 1139, 140, // got -> Goth + 1143, 105, // gra -> Deva + 1147, 95, // grc -> Cprt + 1151, 45, // grt -> Beng + 1155, 120, // gru -> Ethi + 1159, 155, // gu -> Gujr + 1162, 105, // gvr -> Deva + 1166, 10, // gwc -> Arab + 1170, 10, // gwf -> Arab + 1174, 10, // gwt -> Arab + 1178, 105, // gyo -> Deva + 1182, 10, // gzi -> Arab + 1186, 10, // ha_CM -> Arab + 1192, 10, // ha_SD -> Arab + 1198, 10, // hac -> Arab + 1202, 175, // hak -> Hans + 1206, 120, // har -> Ethi + 1210, 10, // haz -> Arab + 1214, 185, // hbo -> Hebr + 1218, 120, // hdy -> Ethi + 1222, 185, // he -> Hebr + 1225, 105, // hi -> Deva + 1228, 485, // hii -> Takr + 1232, 570, // hit -> Xsux + 1236, 10, // hkh -> Arab + 1240, 105, // hlb -> Deva + 1244, 190, // hlu -> Hluw + 1248, 410, // hmd -> Plrd + 1252, 50, // hmj -> Bopo + 1256, 50, // hmq -> Bopo + 1260, 10, // hnd -> Arab + 1264, 105, // hne -> Deva + 1268, 195, // hnj -> Hmnp + 1272, 260, // hnj_AU -> Laoo + 1279, 260, // hnj_CN -> Laoo + 1286, 260, // hnj_FR -> Laoo + 1293, 260, // hnj_GF -> Laoo + 1300, 260, // hnj_LA -> Laoo + 1307, 260, // hnj_MM -> Laoo + 1314, 260, // hnj_SR -> Laoo + 1321, 260, // hnj_TH -> Laoo + 1328, 260, // hnj_VN -> Laoo + 1335, 10, // hno -> Arab + 1339, 105, // hoc -> Deva + 1343, 10, // hoh -> Arab + 1347, 105, // hoj -> Deva + 1351, 170, // how -> Hani + 1355, 105, // hoy -> Deva + 1359, 345, // hpo -> Mymr + 1363, 475, // hrt -> Syrc + 1367, 10, // hrz -> Arab + 1371, 175, // hsn -> Hans + 1375, 10, // hss -> Arab + 1379, 570, // htx -> Xsux + 1383, 105, // hut -> Deva + 1387, 185, // huy -> Hebr + 1391, 100, // huz -> Cyrl + 1395, 20, // hy -> Armn + 1398, 20, // hyw -> Armn + 1402, 575, // ii -> Yiii + 1405, 285, // imy -> Lyci + 1409, 100, // inh -> Cyrl + 1413, 345, // int -> Mymr + 1417, 120, // ior -> Ethi + 1421, 500, // iru -> Taml + 1425, 10, // isk -> Arab + 1429, 185, // itk -> Hebr + 1433, 100, // itl -> Cyrl + 1437, 65, // iu -> Cans + 1440, 185, // iw -> Hebr + 1443, 210, // ja -> Jpan + 1446, 10, // jad -> Arab + 1450, 10, // jat -> Arab + 1454, 185, // jbe -> Hebr + 1458, 10, // jbn -> Arab + 1462, 100, // jct -> Cyrl + 1466, 535, // jda -> Tibt + 1470, 10, // jdg -> Arab + 1474, 100, // jdt -> Cyrl + 1478, 105, // jee -> Deva + 1482, 125, // jge -> Geor + 1486, 185, // ji -> Hebr + 1489, 165, // jje -> Hang + 1493, 345, // jkm -> Mymr + 1497, 105, // jml -> Deva + 1501, 485, // jna -> Takr + 1505, 10, // jnd -> Arab + 1509, 105, // jnl -> Deva + 1513, 105, // jns -> Deva + 1517, 10, // jog -> Arab + 1521, 185, // jpa -> Hebr + 1525, 185, // jpr -> Hebr + 1529, 185, // jrb -> Hebr + 1533, 10, // jrb_MA -> Arab + 1540, 105, // jul -> Deva + 1544, 385, // jun -> Orya + 1548, 385, // juy -> Orya + 1552, 535, // jya -> Tibt + 1556, 185, // jye -> Hebr + 1560, 125, // ka -> Geor + 1563, 100, // kaa -> Cyrl + 1567, 100, // kap -> Cyrl + 1571, 225, // kaw -> Kawi + 1575, 100, // kbd -> Cyrl + 1579, 10, // kbu -> Arab + 1583, 10, // kby -> Arab + 1587, 100, // kca -> Cyrl + 1591, 45, // kdq -> Beng + 1595, 530, // kdt -> Thai + 1599, 100, // ket -> Cyrl + 1603, 105, // kex -> Deva + 1607, 515, // key -> Telu + 1611, 245, // kfa -> Knda + 1615, 105, // kfb -> Deva + 1619, 515, // kfc -> Telu + 1623, 245, // kfd -> Knda + 1627, 500, // kfe -> Taml + 1631, 320, // kfh -> Mlym + 1635, 500, // kfi -> Taml + 1639, 105, // kfk -> Deva + 1643, 10, // kfm -> Arab + 1647, 105, // kfp -> Deva + 1651, 105, // kfq -> Deva + 1655, 105, // kfr -> Deva + 1659, 105, // kfs -> Deva + 1663, 105, // kfx -> Deva + 1667, 105, // kfy -> Deva + 1671, 105, // kgj -> Deva + 1675, 105, // kgy -> Deva + 1679, 495, // khb -> Talu + 1683, 530, // khf -> Thai + 1687, 535, // khg -> Tibt + 1691, 105, // khn -> Deva + 1695, 345, // kht -> Mymr + 1699, 100, // khv -> Cyrl + 1703, 10, // khw -> Arab + 1707, 105, // kif -> Deva + 1711, 100, // kim -> Cyrl + 1715, 105, // kip -> Deva + 1719, 260, // kjg -> Laoo + 1723, 100, // kjh -> Cyrl + 1727, 105, // kjl -> Deva + 1731, 105, // kjo -> Deva + 1735, 345, // kjp -> Mymr + 1739, 530, // kjt -> Thai + 1743, 100, // kk -> Cyrl + 1746, 10, // kk_AF -> Arab + 1752, 10, // kk_CN -> Arab + 1758, 10, // kk_IR -> Arab + 1764, 10, // kk_MN -> Arab + 1770, 535, // kkf -> Tibt + 1774, 255, // kkh -> Lana + 1778, 105, // kkt -> Deva + 1782, 105, // kle -> Deva + 1786, 10, // klj -> Arab + 1790, 105, // klr -> Deva + 1794, 235, // km -> Khmr + 1797, 105, // kmj -> Deva + 1801, 10, // kmz -> Arab + 1805, 245, // kn -> Knda + 1808, 250, // ko -> Kore + 1811, 100, // koi -> Cyrl + 1815, 105, // kok -> Deva + 1819, 100, // kpt -> Cyrl + 1823, 100, // kpy -> Cyrl + 1827, 475, // kqd -> Syrc + 1831, 120, // kqy -> Ethi + 1835, 105, // kra -> Deva + 1839, 100, // krc -> Cyrl + 1843, 100, // krk -> Cyrl + 1847, 235, // krr -> Khmr + 1851, 105, // kru -> Deva + 1855, 235, // krv -> Khmr + 1859, 10, // ks -> Arab + 1862, 345, // ksu -> Mymr + 1866, 345, // ksw -> Mymr + 1870, 105, // ksz -> Deva + 1874, 120, // ktb -> Ethi + 1878, 10, // ktl -> Arab + 1882, 410, // ktp -> Plrd + 1886, 10, // ku_LB -> Arab + 1892, 260, // kuf -> Laoo + 1896, 100, // kum -> Cyrl + 1900, 100, // kv -> Cyrl + 1903, 100, // kva -> Cyrl + 1907, 345, // kvq -> Mymr + 1911, 345, // kvt -> Mymr + 1915, 10, // kvx -> Arab + 1919, 215, // kvy -> Kali + 1923, 345, // kxf -> Mymr + 1927, 345, // kxk -> Mymr + 1931, 530, // kxm -> Thai + 1935, 10, // kxp -> Arab + 1939, 100, // ky -> Cyrl + 1942, 10, // ky_CN -> Arab + 1948, 215, // kyu -> Kali + 1952, 105, // kyv -> Deva + 1956, 105, // kyw -> Deva + 1960, 270, // lab -> Lina + 1964, 185, // lad -> Hebr + 1968, 105, // lae -> Deva + 1972, 10, // lah -> Arab + 1976, 280, // lbc -> Lisu + 1980, 100, // lbe -> Cyrl + 1984, 105, // lbf -> Deva + 1988, 535, // lbj -> Tibt + 1992, 105, // lbm -> Deva + 1996, 260, // lbo -> Laoo + 2000, 105, // lbr -> Deva + 2004, 530, // lcp -> Thai + 2008, 265, // lep -> Lepc + 2012, 100, // lez -> Cyrl + 2016, 105, // lhm -> Deva + 2020, 475, // lhs -> Syrc + 2024, 105, // lif -> Deva + 2028, 280, // lis -> Lisu + 2032, 535, // lkh -> Tibt + 2036, 10, // lki -> Arab + 2040, 105, // lmh -> Deva + 2044, 515, // lmn -> Telu + 2048, 260, // lo -> Laoo + 2051, 105, // loy -> Deva + 2055, 410, // lpo -> Plrd + 2059, 10, // lrc -> Arab + 2063, 10, // lrk -> Arab + 2067, 10, // lrl -> Arab + 2071, 10, // lsa -> Arab + 2075, 185, // lsd -> Hebr + 2079, 10, // lss -> Arab + 2083, 535, // luk -> Tibt + 2087, 105, // luu -> Deva + 2091, 10, // luv -> Arab + 2095, 10, // luz -> Arab + 2099, 530, // lwl -> Thai + 2103, 530, // lwm -> Thai + 2107, 535, // lya -> Tibt + 2111, 175, // lzh -> Hans + 2115, 105, // mag -> Deva + 2119, 105, // mai -> Deva + 2123, 360, // man_GN -> Nkoo + 2130, 10, // mby -> Arab + 2134, 10, // mde -> Arab + 2138, 100, // mdf -> Cyrl + 2142, 120, // mdx -> Ethi + 2146, 120, // mdy -> Ethi + 2150, 10, // mfa -> Arab + 2154, 10, // mfi -> Arab + 2158, 105, // mgp -> Deva + 2162, 10, // mhj -> Arab + 2166, 295, // mid -> Mand + 2170, 105, // mjl -> Deva + 2174, 320, // mjq -> Mlym + 2178, 320, // mjr -> Mlym + 2182, 105, // mjt -> Deva + 2186, 515, // mju -> Telu + 2190, 320, // mjv -> Mlym + 2194, 105, // mjz -> Deva + 2198, 100, // mk -> Cyrl + 2201, 105, // mkb -> Deva + 2205, 105, // mke -> Deva + 2209, 10, // mki -> Arab + 2213, 530, // mkm -> Thai + 2217, 320, // ml -> Mlym + 2220, 530, // mlf -> Thai + 2224, 100, // mn -> Cyrl + 2227, 330, // mn_CN -> Mong + 2233, 45, // mni -> Beng + 2237, 10, // mnj -> Arab + 2241, 100, // mns -> Cyrl + 2245, 345, // mnw -> Mymr + 2249, 530, // mpz -> Thai + 2253, 105, // mr -> Deva + 2256, 530, // mra -> Thai + 2260, 105, // mrd -> Deva + 2264, 100, // mrj -> Cyrl + 2268, 335, // mro -> Mroo + 2272, 105, // mrr -> Deva + 2276, 10, // ms_CC -> Arab + 2282, 100, // mtm -> Cyrl + 2286, 105, // mtr -> Deva + 2290, 100, // mud -> Cyrl + 2294, 535, // muk -> Tibt + 2298, 105, // mut -> Deva + 2302, 500, // muv -> Taml + 2306, 120, // muz -> Ethi + 2310, 330, // mvf -> Mong + 2314, 10, // mvy -> Arab + 2318, 120, // mvz -> Ethi + 2322, 105, // mwr -> Deva + 2326, 345, // mwt -> Mymr + 2330, 195, // mww -> Hmnp + 2334, 345, // my -> Mymr + 2337, 120, // mym -> Ethi + 2341, 100, // myv -> Cyrl + 2345, 295, // myz -> Mand + 2349, 10, // mzn -> Arab + 2353, 175, // nan -> Hans + 2357, 105, // nao -> Deva + 2361, 105, // ncd -> Deva + 2365, 260, // ncq -> Laoo + 2369, 100, // ndf -> Cyrl + 2373, 105, // ne -> Deva + 2376, 100, // neg -> Cyrl + 2380, 535, // neh -> Tibt + 2384, 570, // nei -> Xsux + 2388, 105, // new -> Deva + 2392, 260, // ngt -> Laoo + 2396, 100, // nio -> Cyrl + 2400, 515, // nit -> Telu + 2404, 100, // niv -> Cyrl + 2408, 10, // nli -> Arab + 2412, 10, // nlm -> Arab + 2416, 105, // nlx -> Deva + 2420, 105, // nmm -> Deva + 2424, 560, // nnp -> Wcho + 2428, 255, // nod -> Lana + 2432, 105, // noe -> Deva + 2436, 100, // nog -> Cyrl + 2440, 105, // noi -> Deva + 2444, 430, // non -> Runr + 2448, 575, // nos -> Yiii + 2452, 535, // npb -> Tibt + 2456, 360, // nqo -> Nkoo + 2460, 575, // nsd -> Yiii + 2464, 575, // nsf -> Yiii + 2468, 65, // nsk -> Cans + 2472, 540, // nst -> Tnsa + 2476, 575, // nsv -> Yiii + 2480, 575, // nty -> Yiii + 2484, 10, // ntz -> Arab + 2488, 355, // nwc -> Newa + 2492, 105, // nwx -> Deva + 2496, 530, // nyl -> Thai + 2500, 10, // nyq -> Arab + 2504, 100, // oaa -> Cyrl + 2508, 100, // oac -> Cyrl + 2512, 475, // oar -> Syrc + 2516, 125, // oav -> Geor + 2520, 405, // obm -> Phnx + 2524, 345, // obr -> Mymr + 2528, 10, // odk -> Arab + 2532, 570, // oht -> Xsux + 2536, 65, // oj -> Cans + 2539, 65, // ojs -> Cans + 2543, 165, // okm -> Hang + 2547, 170, // oko -> Hani + 2551, 235, // okz -> Khmr + 2555, 105, // ola -> Deva + 2559, 535, // ole -> Tibt + 2563, 100, // omk -> Cyrl + 2567, 340, // omp -> Mtei + 2571, 325, // omr -> Modi + 2575, 105, // oon -> Deva + 2579, 385, // or -> Orya + 2582, 515, // ort -> Telu + 2586, 10, // oru -> Arab + 2590, 100, // orv -> Cyrl + 2594, 100, // os -> Cyrl + 2597, 390, // osa -> Osge + 2601, 200, // osc -> Ital + 2605, 205, // osi -> Java + 2609, 10, // ota -> Arab + 2613, 535, // otb -> Tibt + 2617, 380, // otk -> Orkh + 2621, 145, // oty -> Gran + 2625, 160, // pa -> Guru + 2628, 10, // pa_PK -> Arab + 2634, 400, // pal -> Phli + 2638, 100, // paq -> Cyrl + 2642, 10, // pbt -> Arab + 2646, 235, // pcb -> Khmr + 2650, 345, // pce -> Mymr + 2654, 320, // pcf -> Mlym + 2658, 320, // pcg -> Mlym + 2662, 105, // pch -> Deva + 2666, 105, // pci -> Deva + 2670, 515, // pcj -> Telu + 2674, 385, // peg -> Orya + 2678, 565, // peo -> Xpeo + 2682, 230, // pgd -> Khar + 2686, 105, // pgg -> Deva + 2690, 370, // pgl -> Ogam + 2694, 200, // pgn -> Ital + 2698, 105, // phd -> Deva + 2702, 345, // phk -> Mymr + 2706, 10, // phl -> Arab + 2710, 405, // phn -> Phnx + 2714, 260, // pho -> Laoo + 2718, 10, // phr -> Arab + 2722, 530, // pht -> Thai + 2726, 10, // phv -> Arab + 2730, 105, // phw -> Deva + 2734, 455, // pi -> Sinh + 2737, 55, // pka -> Brah + 2741, 320, // pkr -> Mlym + 2745, 10, // plk -> Arab + 2749, 345, // pll -> Mymr + 2753, 55, // pmh -> Brah + 2757, 150, // pnt -> Grek + 2761, 230, // pra -> Khar + 2765, 10, // prc -> Arab + 2769, 10, // prd -> Arab + 2773, 155, // prp -> Gujr + 2777, 530, // prt -> Thai + 2781, 10, // prx -> Arab + 2785, 10, // ps -> Arab + 2788, 10, // psh -> Arab + 2792, 10, // psi -> Arab + 2796, 10, // pst -> Arab + 2800, 105, // pum -> Deva + 2804, 345, // pwo -> Mymr + 2808, 105, // pwr -> Deva + 2812, 530, // pww -> Thai + 2816, 345, // pyx -> Mymr + 2820, 10, // qxq -> Arab + 2824, 105, // raa -> Deva + 2828, 105, // rab -> Deva + 2832, 105, // raf -> Deva + 2836, 45, // rah -> Beng + 2840, 105, // raj -> Deva + 2844, 105, // rav -> Deva + 2848, 345, // rbb -> Mymr + 2852, 10, // rdb -> Arab + 2856, 385, // rei -> Orya + 2860, 425, // rhg -> Rohg + 2864, 105, // rji -> Deva + 2868, 105, // rjs -> Deva + 2872, 235, // rka -> Khmr + 2876, 345, // rki -> Mymr + 2880, 45, // rkt -> Beng + 2884, 20, // rmi -> Armn + 2888, 10, // rmt -> Arab + 2892, 345, // rmz -> Mymr + 2896, 100, // rom_BG -> Cyrl + 2903, 100, // rsk -> Cyrl + 2907, 105, // rtw -> Deva + 2911, 100, // ru -> Cyrl + 2914, 100, // rue -> Cyrl + 2918, 100, // rut -> Cyrl + 2922, 105, // rwr -> Deva + 2926, 220, // ryu -> Kana + 2930, 105, // sa -> Deva + 2933, 100, // sah -> Cyrl + 2937, 435, // sam -> Samr + 2941, 375, // sat -> Olck + 2945, 445, // saz -> Saur + 2949, 10, // sbn -> Arab + 2953, 535, // sbu -> Tibt + 2957, 105, // sck -> Deva + 2961, 10, // scl -> Arab + 2965, 10, // scl_IN -> Arab + 2972, 105, // scp -> Deva + 2976, 260, // sct -> Laoo + 2980, 485, // scu -> Takr + 2984, 150, // scx -> Grek + 2988, 10, // sd -> Arab + 2991, 105, // sd_IN -> Deva + 2997, 10, // sdb -> Arab + 3001, 10, // sdf -> Arab + 3005, 10, // sdg -> Arab + 3009, 10, // sdh -> Arab + 3013, 10, // sds -> Arab + 3017, 100, // sel -> Cyrl + 3021, 410, // sfm -> Plrd + 3025, 370, // sga -> Ogam + 3029, 100, // sgh -> Cyrl + 3033, 105, // sgj -> Deva + 3037, 10, // sgr -> Arab + 3041, 535, // sgt -> Tibt + 3045, 120, // sgw -> Ethi + 3049, 10, // sgy -> Arab + 3053, 10, // shd -> Arab + 3057, 520, // shi -> Tfng + 3061, 10, // shm -> Arab + 3065, 345, // shn -> Mymr + 3069, 10, // shu -> Arab + 3073, 10, // shv -> Arab + 3077, 455, // si -> Sinh + 3080, 100, // sia -> Cyrl + 3084, 535, // sip -> Tibt + 3088, 10, // siy -> Arab + 3092, 10, // siz -> Arab + 3096, 100, // sjd -> Cyrl + 3100, 105, // sjp -> Deva + 3104, 100, // sjt -> Cyrl + 3108, 530, // skb -> Thai + 3112, 105, // skj -> Deva + 3116, 10, // skr -> Arab + 3120, 10, // slq -> Arab + 3124, 575, // smh -> Yiii + 3128, 435, // smp -> Samr + 3132, 235, // smu -> Khmr + 3136, 10, // smy -> Arab + 3140, 510, // soa -> Tavt + 3144, 460, // sog -> Sogd + 3148, 105, // soi -> Deva + 3152, 530, // sou -> Thai + 3156, 535, // spt -> Tibt + 3160, 385, // spv -> Orya + 3164, 10, // sqo -> Arab + 3168, 260, // sqq -> Laoo + 3172, 10, // sqt -> Arab + 3176, 100, // sr -> Cyrl + 3179, 465, // srb -> Sora + 3183, 10, // srh -> Arab + 3187, 105, // srx -> Deva + 3191, 10, // srz -> Arab + 3195, 10, // ssh -> Arab + 3199, 260, // sss -> Laoo + 3203, 10, // sts -> Arab + 3207, 120, // stv -> Ethi + 3211, 100, // sty -> Cyrl + 3215, 105, // suz -> Deva + 3219, 125, // sva -> Geor + 3223, 10, // swb -> Arab + 3227, 170, // swi -> Hani + 3231, 105, // swv -> Deva + 3235, 475, // syc -> Syrc + 3239, 45, // syl -> Beng + 3243, 475, // syn -> Syrc + 3247, 475, // syr -> Syrc + 3251, 105, // syw -> Deva + 3255, 500, // ta -> Taml + 3258, 100, // tab -> Cyrl + 3262, 105, // taj -> Deva + 3266, 480, // tbk -> Tagb + 3270, 535, // tcn -> Tibt + 3274, 345, // tco -> Mymr + 3278, 500, // tcx -> Taml + 3282, 245, // tcy -> Knda + 3286, 520, // tda -> Tfng + 3290, 105, // tdb -> Deva + 3294, 490, // tdd -> Tale + 3298, 105, // tdg -> Deva + 3302, 105, // tdh -> Deva + 3306, 515, // te -> Telu + 3309, 205, // tes -> Java + 3313, 100, // tg -> Cyrl + 3316, 10, // tg_PK -> Arab + 3322, 105, // tge -> Deva + 3326, 535, // tgf -> Tibt + 3330, 530, // th -> Thai + 3333, 105, // the -> Deva + 3337, 105, // thf -> Deva + 3341, 490, // thi -> Tale + 3345, 105, // thl -> Deva + 3349, 530, // thm -> Thai + 3353, 105, // thq -> Deva + 3357, 105, // thr -> Deva + 3361, 105, // ths -> Deva + 3365, 120, // ti -> Ethi + 3368, 120, // tig -> Ethi + 3372, 105, // tij -> Deva + 3376, 100, // tin -> Cyrl + 3380, 345, // tjl -> Mymr + 3384, 10, // tjo -> Arab + 3388, 105, // tkb -> Deva + 3392, 10, // tks -> Arab + 3396, 105, // tkt -> Deva + 3400, 105, // tmk -> Deva + 3404, 475, // tmr -> Syrc + 3408, 60, // tnv -> Cakm + 3412, 10, // tov -> Arab + 3416, 235, // tpu -> Khmr + 3420, 10, // tra -> Arab + 3424, 185, // trg -> Hebr + 3428, 10, // trm -> Arab + 3432, 10, // trw -> Arab + 3436, 150, // tsd -> Grek + 3440, 535, // tsj -> Tibt + 3444, 100, // tt -> Cyrl + 3447, 260, // tth -> Laoo + 3451, 260, // tto -> Laoo + 3455, 530, // tts -> Thai + 3459, 345, // tvn -> Mymr + 3463, 105, // twm -> Deva + 3467, 505, // txg -> Tang + 3471, 545, // txo -> Toto + 3475, 510, // tyr -> Tavt + 3479, 100, // tyv -> Cyrl + 3483, 100, // ude -> Cyrl + 3487, 320, // udg -> Mlym + 3491, 0, // udi -> Aghb + 3495, 100, // udm -> Cyrl + 3499, 10, // ug -> Arab + 3502, 100, // ug_KZ -> Cyrl + 3508, 100, // ug_MN -> Cyrl + 3514, 550, // uga -> Ugar + 3518, 100, // ugh -> Cyrl + 3522, 530, // ugo -> Thai + 3526, 100, // uk -> Cyrl + 3529, 385, // uki -> Orya + 3533, 100, // ulc -> Cyrl + 3537, 45, // unr -> Beng + 3541, 105, // unr_NP -> Deva + 3548, 45, // unx -> Beng + 3552, 10, // ur -> Arab + 3555, 530, // urk -> Thai + 3559, 10, // ush -> Arab + 3563, 150, // uum -> Grek + 3567, 10, // uz_AF -> Arab + 3573, 100, // uz_CN -> Cyrl + 3579, 10, // uzs -> Arab + 3583, 500, // vaa -> Taml + 3587, 10, // vaf -> Arab + 3591, 105, // vah -> Deva + 3595, 555, // vai -> Vaii + 3599, 105, // vas -> Deva + 3603, 105, // vav -> Deva + 3607, 105, // vay -> Deva + 3611, 10, // vgr -> Arab + 3615, 245, // vmd -> Knda + 3619, 10, // vmh -> Arab + 3623, 120, // wal -> Ethi + 3627, 10, // wbk -> Arab + 3631, 515, // wbq -> Telu + 3635, 105, // wbr -> Deva + 3639, 10, // wlo -> Arab + 3643, 105, // wme -> Deva + 3647, 10, // wne -> Arab + 3651, 10, // wni -> Arab + 3655, 130, // wsg -> Gong + 3659, 10, // wsv -> Arab + 3663, 105, // wtm -> Deva + 3667, 175, // wuu -> Hans + 3671, 100, // xal -> Cyrl + 3675, 120, // xan -> Ethi + 3679, 100, // xas -> Cyrl + 3683, 85, // xco -> Chrs + 3687, 70, // xcr -> Cari + 3691, 100, // xdq -> Cyrl + 3695, 10, // xhe -> Arab + 3699, 235, // xhm -> Khmr + 3703, 385, // xis -> Orya + 3707, 10, // xka -> Arab + 3711, 10, // xkc -> Arab + 3715, 10, // xkj -> Arab + 3719, 10, // xkp -> Arab + 3723, 285, // xlc -> Lyci + 3727, 290, // xld -> Lydi + 3731, 115, // xly -> Elym + 3735, 125, // xmf -> Geor + 3739, 300, // xmn -> Mani + 3743, 315, // xmr -> Merc + 3747, 350, // xna -> Narb + 3751, 105, // xnr -> Deva + 3755, 150, // xpg -> Grek + 3759, 370, // xpi -> Ogam + 3763, 100, // xpm -> Cyrl + 3767, 415, // xpr -> Prti + 3771, 100, // xrm -> Cyrl + 3775, 100, // xrn -> Cyrl + 3779, 440, // xsa -> Sarb + 3783, 105, // xsr -> Deva + 3787, 100, // xss -> Cyrl + 3791, 500, // xub -> Taml + 3795, 500, // xuj -> Taml + 3799, 200, // xve -> Ital + 3803, 10, // xvi -> Arab + 3807, 100, // xwo -> Cyrl + 3811, 305, // xzh -> Marc + 3815, 100, // yai -> Cyrl + 3819, 105, // ybh -> Deva + 3823, 105, // ybi -> Deva + 3827, 10, // ydg -> Arab + 3831, 320, // yea -> Mlym + 3835, 150, // yej -> Grek + 3839, 515, // yeu -> Telu + 3843, 410, // ygp -> Plrd + 3847, 185, // yhd -> Hebr + 3851, 185, // yi -> Hebr + 3854, 575, // yig -> Yiii + 3858, 185, // yih -> Hebr + 3862, 575, // yiv -> Yiii + 3866, 100, // ykg -> Cyrl + 3870, 410, // yna -> Plrd + 3874, 100, // ynk -> Cyrl + 3878, 210, // yoi -> Jpan + 3882, 530, // yoy -> Thai + 3886, 100, // yrk -> Cyrl + 3890, 575, // ysd -> Yiii + 3894, 575, // ysn -> Yiii + 3898, 575, // ysp -> Yiii + 3902, 100, // ysr -> Cyrl + 3906, 410, // ysy -> Plrd + 3910, 185, // yud -> Hebr + 3914, 180, // yue -> Hant + 3918, 175, // yue_CN -> Hans + 3925, 100, // yug -> Cyrl + 3929, 100, // yux -> Cyrl + 3933, 410, // ywq -> Plrd + 3937, 410, // ywu -> Plrd + 3941, 535, // zau -> Tibt + 3945, 10, // zba -> Arab + 3949, 170, // zch -> Hani + 3953, 10, // zdj -> Arab + 3957, 170, // zeh -> Hani + 3961, 520, // zen -> Tfng + 3965, 170, // zgb -> Hani + 3969, 520, // zgh -> Tfng + 3973, 170, // zgm -> Hani + 3977, 170, // zgn -> Hani + 3981, 175, // zh -> Hans + 3984, 180, // zh_AU -> Hant + 3990, 180, // zh_BN -> Hant + 3996, 180, // zh_GB -> Hant + 4002, 180, // zh_GF -> Hant + 4008, 180, // zh_HK -> Hant + 4014, 180, // zh_ID -> Hant + 4020, 180, // zh_MO -> Hant + 4026, 180, // zh_PA -> Hant + 4032, 180, // zh_PF -> Hant + 4038, 180, // zh_PH -> Hant + 4044, 180, // zh_SR -> Hant + 4050, 180, // zh_TH -> Hant + 4056, 180, // zh_TW -> Hant + 4062, 180, // zh_US -> Hant + 4068, 180, // zh_VN -> Hant + 4074, 170, // zhd -> Hani + 4078, 365, // zhx -> Nshu + 4082, 100, // zkb -> Cyrl + 4086, 100, // zko -> Cyrl + 4090, 240, // zkt -> Kits + 4094, 100, // zkz -> Cyrl + 4098, 170, // zlj -> Hani + 4102, 170, // zln -> Hani + 4106, 170, // zlq -> Hani + 4110, 170, // zqe -> Hani + 4114, 185, // zrp -> Hebr + 4118, 10, // zum -> Arab + 4122, 170, // zyg -> Hani + 4126, 170, // zyn -> Hani + 4130, 170, // zzj -> Hani }; //====================================================================== @@ -1150,35 +1138,36 @@ const char parentLocaleChars[] = "en_AU\0en_BB\0en_BE\0en_BM\0en_BS\0en_BW\0en_BZ\0en_CC\0en_CH\0" "en_CK\0en_CM\0en_CX\0en_CY\0en_DE\0en_DG\0en_DK\0en_DM\0en_Dsrt\0" "en_ER\0en_FI\0en_FJ\0en_FK\0en_FM\0en_GB\0en_GD\0en_GG\0en_GH\0" - "en_GI\0en_GM\0en_GY\0en_HK\0en_IE\0en_IL\0en_IM\0en_IN\0en_IO\0" - "en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0en_LS\0" - "en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0en_NA\0" - "en_NF\0en_NG\0en_NL\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0en_PN\0" - "en_PW\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0en_SI\0" - "en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0en_TT\0" - "en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0en_ZM\0" - "en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0es_CR\0" - "es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_MX\0es_NI\0es_PA\0es_PE\0" - "es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0ff_Arab\0fr_HT\0" - "ha_Arab\0hi_Latn\0ht\0iu_Latn\0kk_Arab\0ks_Deva\0ku_Arab\0ky_Arab\0" - "ky_Latn\0ml_Arab\0mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0no_NO\0" - "pa_Arab\0pt_AO\0pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0" - "pt_MZ\0pt_PT\0pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0" - "sd_Sind\0shi_Latn\0so_Arab\0sr_Latn\0sw_Arab\0tg_Arab\0ug_Cyrl\0" - "uz_Arab\0uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0" - "zh_Hant_HK\0zh_Hant_MO\0"; + "en_GI\0en_GM\0en_GY\0en_HK\0en_ID\0en_IE\0en_IL\0en_IM\0en_IN\0" + "en_IO\0en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0" + "en_LS\0en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0" + "en_NA\0en_NF\0en_NG\0en_NL\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0" + "en_PN\0en_PW\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0" + "en_SI\0en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0" + "en_TT\0en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0" + "en_ZM\0en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0" + "es_CR\0es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_JP\0es_MX\0es_NI\0" + "es_PA\0es_PE\0es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0" + "ff_Arab\0fr_HT\0ha_Arab\0hi_Latn\0ht\0iu_Latn\0kk_Arab\0ks_Deva\0" + "ku_Arab\0kxv_Deva\0kxv_Orya\0kxv_Telu\0ky_Arab\0ky_Latn\0ml_Arab\0" + "mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0no_NO\0pa_Arab\0pt_AO\0" + "pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0pt_MZ\0pt_PT\0" + "pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0sd_Sind\0shi_Latn\0" + "so_Arab\0sr_Latn\0sw_Arab\0tg_Arab\0ug_Cyrl\0uz_Arab\0uz_Cyrl\0" + "vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0zh_Hant_HK\0zh_Hant_MO\0" + ""; const int32_t parentLocaleTable[] = { - 0, 1023, // az_Arab -> root - 8, 1023, // az_Cyrl -> root - 16, 1023, // bal_Latn -> root - 25, 1023, // blt_Latn -> root - 34, 1023, // bm_Nkoo -> root - 42, 1023, // bs_Cyrl -> root - 50, 1023, // byn_Latn -> root - 59, 1023, // cu_Glag -> root - 67, 1023, // dje_Arab -> root - 76, 1023, // dyo_Arab -> root + 0, 1062, // az_Arab -> root + 8, 1062, // az_Cyrl -> root + 16, 1062, // bal_Latn -> root + 25, 1062, // blt_Latn -> root + 34, 1062, // bm_Nkoo -> root + 42, 1062, // bs_Cyrl -> root + 50, 1062, // byn_Latn -> root + 59, 1062, // cu_Glag -> root + 67, 1062, // dje_Arab -> root + 76, 1062, // dyo_Arab -> root 92, 85, // en_150 -> en_001 99, 85, // en_AG -> en_001 105, 85, // en_AI -> en_001 @@ -1200,7 +1189,7 @@ const int32_t parentLocaleTable[] = { 201, 85, // en_DG -> en_001 207, 92, // en_DK -> en_150 213, 85, // en_DM -> en_001 - 219, 1023, // en_Dsrt -> root + 219, 1062, // en_Dsrt -> root 227, 85, // en_ER -> en_001 233, 92, // en_FI -> en_150 239, 85, // en_FJ -> en_001 @@ -1214,136 +1203,141 @@ const int32_t parentLocaleTable[] = { 287, 85, // en_GM -> en_001 293, 85, // en_GY -> en_001 299, 85, // en_HK -> en_001 - 305, 85, // en_IE -> en_001 - 311, 85, // en_IL -> en_001 - 317, 85, // en_IM -> en_001 - 323, 85, // en_IN -> en_001 - 329, 85, // en_IO -> en_001 - 335, 85, // en_JE -> en_001 - 341, 85, // en_JM -> en_001 - 347, 85, // en_KE -> en_001 - 353, 85, // en_KI -> en_001 - 359, 85, // en_KN -> en_001 - 365, 85, // en_KY -> en_001 - 371, 85, // en_LC -> en_001 - 377, 85, // en_LR -> en_001 - 383, 85, // en_LS -> en_001 - 389, 85, // en_MG -> en_001 - 395, 85, // en_MO -> en_001 - 401, 85, // en_MS -> en_001 - 407, 85, // en_MT -> en_001 - 413, 85, // en_MU -> en_001 - 419, 85, // en_MV -> en_001 - 425, 85, // en_MW -> en_001 - 431, 85, // en_MY -> en_001 - 437, 85, // en_NA -> en_001 - 443, 85, // en_NF -> en_001 - 449, 85, // en_NG -> en_001 - 455, 92, // en_NL -> en_150 - 461, 85, // en_NR -> en_001 - 467, 85, // en_NU -> en_001 - 473, 85, // en_NZ -> en_001 - 479, 85, // en_PG -> en_001 - 485, 85, // en_PK -> en_001 - 491, 85, // en_PN -> en_001 - 497, 85, // en_PW -> en_001 - 503, 85, // en_RW -> en_001 - 509, 85, // en_SB -> en_001 - 515, 85, // en_SC -> en_001 - 521, 85, // en_SD -> en_001 - 527, 92, // en_SE -> en_150 - 533, 85, // en_SG -> en_001 - 539, 85, // en_SH -> en_001 - 545, 92, // en_SI -> en_150 - 551, 85, // en_SL -> en_001 - 557, 85, // en_SS -> en_001 - 563, 85, // en_SX -> en_001 - 569, 85, // en_SZ -> en_001 - 575, 1023, // en_Shaw -> root - 583, 85, // en_TC -> en_001 - 589, 85, // en_TK -> en_001 - 595, 85, // en_TO -> en_001 - 601, 85, // en_TT -> en_001 - 607, 85, // en_TV -> en_001 - 613, 85, // en_TZ -> en_001 - 619, 85, // en_UG -> en_001 - 625, 85, // en_VC -> en_001 - 631, 85, // en_VG -> en_001 - 637, 85, // en_VU -> en_001 - 643, 85, // en_WS -> en_001 - 649, 85, // en_ZA -> en_001 - 655, 85, // en_ZM -> en_001 - 661, 85, // en_ZW -> en_001 - 674, 667, // es_AR -> es_419 - 680, 667, // es_BO -> es_419 - 686, 667, // es_BR -> es_419 - 692, 667, // es_BZ -> es_419 - 698, 667, // es_CL -> es_419 - 704, 667, // es_CO -> es_419 - 710, 667, // es_CR -> es_419 - 716, 667, // es_CU -> es_419 - 722, 667, // es_DO -> es_419 - 728, 667, // es_EC -> es_419 - 734, 667, // es_GT -> es_419 - 740, 667, // es_HN -> es_419 - 746, 667, // es_MX -> es_419 - 752, 667, // es_NI -> es_419 - 758, 667, // es_PA -> es_419 - 764, 667, // es_PE -> es_419 - 770, 667, // es_PR -> es_419 - 776, 667, // es_PY -> es_419 - 782, 667, // es_SV -> es_419 - 788, 667, // es_US -> es_419 - 794, 667, // es_UY -> es_419 - 800, 667, // es_VE -> es_419 - 806, 1023, // ff_Adlm -> root - 814, 1023, // ff_Arab -> root - 828, 1023, // ha_Arab -> root - 836, 323, // hi_Latn -> en_IN - 844, 822, // ht -> fr_HT - 847, 1023, // iu_Latn -> root - 855, 1023, // kk_Arab -> root - 863, 1023, // ks_Deva -> root - 871, 1023, // ku_Arab -> root - 879, 1023, // ky_Arab -> root - 887, 1023, // ky_Latn -> root - 895, 1023, // ml_Arab -> root - 903, 1023, // mn_Mong -> root - 911, 1023, // mni_Mtei -> root - 920, 1023, // ms_Arab -> root - 928, 934, // nb -> no - 931, 934, // nn -> no - 937, 934, // no_NO -> no - 943, 1023, // pa_Arab -> root - 951, 1005, // pt_AO -> pt_PT - 957, 1005, // pt_CH -> pt_PT - 963, 1005, // pt_CV -> pt_PT - 969, 1005, // pt_FR -> pt_PT - 975, 1005, // pt_GQ -> pt_PT - 981, 1005, // pt_GW -> pt_PT - 987, 1005, // pt_LU -> pt_PT - 993, 1005, // pt_MO -> pt_PT - 999, 1005, // pt_MZ -> pt_PT - 1011, 1005, // pt_ST -> pt_PT - 1017, 1005, // pt_TL -> pt_PT - 1028, 1023, // sat_Deva -> root - 1037, 1023, // sd_Deva -> root - 1045, 1023, // sd_Khoj -> root - 1053, 1023, // sd_Sind -> root - 1061, 1023, // shi_Latn -> root - 1070, 1023, // so_Arab -> root - 1078, 1023, // sr_Latn -> root - 1086, 1023, // sw_Arab -> root - 1094, 1023, // tg_Arab -> root - 1102, 1023, // ug_Cyrl -> root - 1110, 1023, // uz_Arab -> root - 1118, 1023, // uz_Cyrl -> root - 1126, 1023, // vai_Latn -> root - 1135, 1023, // wo_Arab -> root - 1143, 1023, // yo_Arab -> root - 1151, 1023, // yue_Hans -> root - 1160, 1023, // zh_Hant -> root - 1179, 1168, // zh_Hant_MO -> zh_Hant_HK + 305, 85, // en_ID -> en_001 + 311, 85, // en_IE -> en_001 + 317, 85, // en_IL -> en_001 + 323, 85, // en_IM -> en_001 + 329, 85, // en_IN -> en_001 + 335, 85, // en_IO -> en_001 + 341, 85, // en_JE -> en_001 + 347, 85, // en_JM -> en_001 + 353, 85, // en_KE -> en_001 + 359, 85, // en_KI -> en_001 + 365, 85, // en_KN -> en_001 + 371, 85, // en_KY -> en_001 + 377, 85, // en_LC -> en_001 + 383, 85, // en_LR -> en_001 + 389, 85, // en_LS -> en_001 + 395, 85, // en_MG -> en_001 + 401, 85, // en_MO -> en_001 + 407, 85, // en_MS -> en_001 + 413, 85, // en_MT -> en_001 + 419, 85, // en_MU -> en_001 + 425, 85, // en_MV -> en_001 + 431, 85, // en_MW -> en_001 + 437, 85, // en_MY -> en_001 + 443, 85, // en_NA -> en_001 + 449, 85, // en_NF -> en_001 + 455, 85, // en_NG -> en_001 + 461, 92, // en_NL -> en_150 + 467, 85, // en_NR -> en_001 + 473, 85, // en_NU -> en_001 + 479, 85, // en_NZ -> en_001 + 485, 85, // en_PG -> en_001 + 491, 85, // en_PK -> en_001 + 497, 85, // en_PN -> en_001 + 503, 85, // en_PW -> en_001 + 509, 85, // en_RW -> en_001 + 515, 85, // en_SB -> en_001 + 521, 85, // en_SC -> en_001 + 527, 85, // en_SD -> en_001 + 533, 92, // en_SE -> en_150 + 539, 85, // en_SG -> en_001 + 545, 85, // en_SH -> en_001 + 551, 92, // en_SI -> en_150 + 557, 85, // en_SL -> en_001 + 563, 85, // en_SS -> en_001 + 569, 85, // en_SX -> en_001 + 575, 85, // en_SZ -> en_001 + 581, 1062, // en_Shaw -> root + 589, 85, // en_TC -> en_001 + 595, 85, // en_TK -> en_001 + 601, 85, // en_TO -> en_001 + 607, 85, // en_TT -> en_001 + 613, 85, // en_TV -> en_001 + 619, 85, // en_TZ -> en_001 + 625, 85, // en_UG -> en_001 + 631, 85, // en_VC -> en_001 + 637, 85, // en_VG -> en_001 + 643, 85, // en_VU -> en_001 + 649, 85, // en_WS -> en_001 + 655, 85, // en_ZA -> en_001 + 661, 85, // en_ZM -> en_001 + 667, 85, // en_ZW -> en_001 + 680, 673, // es_AR -> es_419 + 686, 673, // es_BO -> es_419 + 692, 673, // es_BR -> es_419 + 698, 673, // es_BZ -> es_419 + 704, 673, // es_CL -> es_419 + 710, 673, // es_CO -> es_419 + 716, 673, // es_CR -> es_419 + 722, 673, // es_CU -> es_419 + 728, 673, // es_DO -> es_419 + 734, 673, // es_EC -> es_419 + 740, 673, // es_GT -> es_419 + 746, 673, // es_HN -> es_419 + 752, 673, // es_JP -> es_419 + 758, 673, // es_MX -> es_419 + 764, 673, // es_NI -> es_419 + 770, 673, // es_PA -> es_419 + 776, 673, // es_PE -> es_419 + 782, 673, // es_PR -> es_419 + 788, 673, // es_PY -> es_419 + 794, 673, // es_SV -> es_419 + 800, 673, // es_US -> es_419 + 806, 673, // es_UY -> es_419 + 812, 673, // es_VE -> es_419 + 818, 1062, // ff_Adlm -> root + 826, 1062, // ff_Arab -> root + 840, 1062, // ha_Arab -> root + 848, 329, // hi_Latn -> en_IN + 856, 834, // ht -> fr_HT + 859, 1062, // iu_Latn -> root + 867, 1062, // kk_Arab -> root + 875, 1062, // ks_Deva -> root + 883, 1062, // ku_Arab -> root + 891, 1062, // kxv_Deva -> root + 900, 1062, // kxv_Orya -> root + 909, 1062, // kxv_Telu -> root + 918, 1062, // ky_Arab -> root + 926, 1062, // ky_Latn -> root + 934, 1062, // ml_Arab -> root + 942, 1062, // mn_Mong -> root + 950, 1062, // mni_Mtei -> root + 959, 1062, // ms_Arab -> root + 967, 973, // nb -> no + 970, 973, // nn -> no + 976, 973, // no_NO -> no + 982, 1062, // pa_Arab -> root + 990, 1044, // pt_AO -> pt_PT + 996, 1044, // pt_CH -> pt_PT + 1002, 1044, // pt_CV -> pt_PT + 1008, 1044, // pt_FR -> pt_PT + 1014, 1044, // pt_GQ -> pt_PT + 1020, 1044, // pt_GW -> pt_PT + 1026, 1044, // pt_LU -> pt_PT + 1032, 1044, // pt_MO -> pt_PT + 1038, 1044, // pt_MZ -> pt_PT + 1050, 1044, // pt_ST -> pt_PT + 1056, 1044, // pt_TL -> pt_PT + 1067, 1062, // sat_Deva -> root + 1076, 1062, // sd_Deva -> root + 1084, 1062, // sd_Khoj -> root + 1092, 1062, // sd_Sind -> root + 1100, 1062, // shi_Latn -> root + 1109, 1062, // so_Arab -> root + 1117, 1062, // sr_Latn -> root + 1125, 1062, // sw_Arab -> root + 1133, 1062, // tg_Arab -> root + 1141, 1062, // ug_Cyrl -> root + 1149, 1062, // uz_Arab -> root + 1157, 1062, // uz_Cyrl -> root + 1165, 1062, // vai_Latn -> root + 1174, 1062, // wo_Arab -> root + 1182, 1062, // yo_Arab -> root + 1190, 1062, // yue_Hans -> root + 1199, 1062, // zh_Hant -> root + 1218, 1207, // zh_Hant_MO -> zh_Hant_HK }; diff --git a/yass/third_party/icu/source/common/localematcher.cpp b/yass/third_party/icu/source/common/localematcher.cpp index 5f8c703df7..6fc578af11 100644 --- a/yass/third_party/icu/source/common/localematcher.cpp +++ b/yass/third_party/icu/source/common/localematcher.cpp @@ -307,7 +307,7 @@ LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) { return UND_LSR; } else { - return likelySubtags.makeMaximizedLsrFrom(locale, errorCode); + return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode); } } diff --git a/yass/third_party/icu/source/common/locid.cpp b/yass/third_party/icu/source/common/locid.cpp index 70a794ae07..64ff63f361 100644 --- a/yass/third_party/icu/source/common/locid.cpp +++ b/yass/third_party/icu/source/common/locid.cpp @@ -563,7 +563,7 @@ private: LocalMemory& replacementIndexes, int32_t &length, void (*checkType)(const char* type), - void (*checkReplacement)(const UnicodeString& replacement), + void (*checkReplacement)(const UChar* replacement), UErrorCode &status); // Read the languageAlias data from alias to @@ -700,7 +700,7 @@ AliasDataBuilder::readAlias( LocalMemory& replacementIndexes, int32_t &length, void (*checkType)(const char* type), - void (*checkReplacement)(const UnicodeString& replacement), + void (*checkReplacement)(const UChar* replacement), UErrorCode &status) { if (U_FAILURE(status)) { return; @@ -720,8 +720,8 @@ AliasDataBuilder::readAlias( LocalUResourceBundlePointer res( ures_getNextResource(alias, nullptr, &status)); const char* aliasFrom = ures_getKey(res.getAlias()); - UnicodeString aliasTo = - ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status); + const UChar* aliasTo = + ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status); if (U_FAILURE(status)) return; checkType(aliasFrom); @@ -766,7 +766,7 @@ AliasDataBuilder::readLanguageAlias( #else [](const char*) {}, #endif - [](const UnicodeString&) {}, status); + [](const UChar*) {}, status); } /** @@ -790,12 +790,12 @@ AliasDataBuilder::readScriptAlias( [](const char* type) { U_ASSERT(uprv_strlen(type) == 4); }, - [](const UnicodeString& replacement) { - U_ASSERT(replacement.length() == 4); + [](const UChar* replacement) { + U_ASSERT(u_strlen(replacement) == 4); }, #else [](const char*) {}, - [](const UnicodeString&) { }, + [](const UChar*) { }, #endif status); } @@ -824,7 +824,7 @@ AliasDataBuilder::readTerritoryAlias( #else [](const char*) {}, #endif - [](const UnicodeString&) { }, + [](const UChar*) { }, status); } @@ -851,15 +851,16 @@ AliasDataBuilder::readVariantAlias( U_ASSERT(uprv_strlen(type) != 4 || (type[0] >= '0' && type[0] <= '9')); }, - [](const UnicodeString& replacement) { - U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8); - U_ASSERT(replacement.length() != 4 || - (replacement.charAt(0) >= u'0' && - replacement.charAt(0) <= u'9')); + [](const UChar* replacement) { + int32_t len = u_strlen(replacement); + U_ASSERT(len >= 4 && len <= 8); + U_ASSERT(len != 4 || + (*replacement >= u'0' && + *replacement <= u'9')); }, #else [](const char*) {}, - [](const UnicodeString&) { }, + [](const UChar*) { }, #endif status); } @@ -888,7 +889,7 @@ AliasDataBuilder::readSubdivisionAlias( #else [](const char*) {}, #endif - [](const UnicodeString&) { }, + [](const UChar*) { }, status); } @@ -1066,7 +1067,13 @@ class AliasReplacer { public: AliasReplacer(UErrorCode status) : language(nullptr), script(nullptr), region(nullptr), - extensions(nullptr), variants(status), + extensions(nullptr), + // store value in variants only once + variants(nullptr, + ([](UElement e1, UElement e2) -> UBool { + return 0==uprv_strcmp((const char*)e1.pointer, + (const char*)e2.pointer);}), + status), data(nullptr) { } ~AliasReplacer() { @@ -1652,10 +1659,16 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr && U_SUCCESS(status)) { *end = NULL_CHAR; // null terminate inside variantsBuff - variants.addElement(start, status); + // do not add "" or duplicate data to variants + if (*start && !variants.contains(start)) { + variants.addElement(start, status); + } start = end + 1; } - variants.addElement(start, status); + // do not add "" or duplicate data to variants + if (*start && !variants.contains(start)) { + variants.addElement(start, status); + } } if (U_FAILURE(status)) { return false; } @@ -2079,6 +2092,10 @@ Locale::addLikelySubtags(UErrorCode& status) { void Locale::minimizeSubtags(UErrorCode& status) { + Locale::minimizeSubtags(false, status); +} +void +Locale::minimizeSubtags(bool favorScript, UErrorCode& status) { if (U_FAILURE(status)) { return; } @@ -2086,7 +2103,7 @@ Locale::minimizeSubtags(UErrorCode& status) { CharString minimizedLocaleID; { CharStringByteSink sink(&minimizedLocaleID); - ulocimp_minimizeSubtags(fullName, sink, &status); + ulocimp_minimizeSubtags(fullName, sink, favorScript, &status); } if (U_FAILURE(status)) { @@ -2402,8 +2419,9 @@ Locale::getLocaleCache() } class KeywordEnumeration : public StringEnumeration { -private: +protected: char *keywords; +private: char *current; int32_t length; UnicodeString currUSKey; @@ -2510,6 +2528,17 @@ public: if (resultLength != nullptr) *resultLength = 0; return nullptr; } + virtual int32_t count(UErrorCode &/*status*/) const override { + char *kw = keywords; + int32_t result = 0; + while(*kw) { + if (uloc_toUnicodeLocaleKey(kw) != nullptr) { + result++; + } + kw += uprv_strlen(kw)+1; + } + return result; + } }; // Out-of-line virtual destructor to serve as the "key function". diff --git a/yass/third_party/icu/source/common/loclikely.cpp b/yass/third_party/icu/source/common/loclikely.cpp index d2a05c6364..eedfb8149e 100644 --- a/yass/third_party/icu/source/common/loclikely.cpp +++ b/yass/third_party/icu/source/common/loclikely.cpp @@ -31,82 +31,10 @@ #include "charstr.h" #include "cmemory.h" #include "cstring.h" +#include "loclikelysubtags.h" #include "ulocimp.h" #include "ustr_imp.h" -/** - * These are the canonical strings for unknown languages, scripts and regions. - **/ -static const char* const unknownLanguage = "und"; -static const char* const unknownScript = "Zzzz"; -static const char* const unknownRegion = "ZZ"; - -/** - * This function looks for the localeID in the likelySubtags resource. - * - * @param localeID The tag to find. - * @param buffer A buffer to hold the matching entry - * @param bufferLength The length of the output buffer - * @return A pointer to "buffer" if found, or a null pointer if not. - */ -static const char* U_CALLCONV -findLikelySubtags(const char* localeID, - char* buffer, - int32_t bufferLength, - UErrorCode* err) { - const char* result = nullptr; - - if (!U_FAILURE(*err)) { - int32_t resLen = 0; - const char16_t* s = nullptr; - UErrorCode tmpErr = U_ZERO_ERROR; - icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr)); - if (U_SUCCESS(tmpErr)) { - icu::CharString und; - if (localeID != nullptr) { - if (*localeID == '\0') { - localeID = unknownLanguage; - } else if (*localeID == '_') { - und.append(unknownLanguage, *err); - und.append(localeID, *err); - if (U_FAILURE(*err)) { - return nullptr; - } - localeID = und.data(); - } - } - s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr); - - if (U_FAILURE(tmpErr)) { - /* - * If a resource is missing, it's not really an error, it's - * just that we don't have any data for that particular locale ID. - */ - if (tmpErr != U_MISSING_RESOURCE_ERROR) { - *err = tmpErr; - } - } - else if (resLen >= bufferLength) { - /* The buffer should never overflow. */ - *err = U_INTERNAL_PROGRAM_ERROR; - } - else { - u_UCharsToChars(s, buffer, resLen + 1); - if (resLen >= 3 && - uprv_strnicmp(buffer, unknownLanguage, 3) == 0 && - (resLen == 3 || buffer[3] == '_')) { - uprv_memmove(buffer, buffer + 3, resLen - 3 + 1); - } - result = buffer; - } - } else { - *err = tmpErr; - } - } - - return result; -} - /** * Append a tag to a buffer, adding the separator if necessary. The buffer * must be large enough to contain the resulting tag plus any separator @@ -360,57 +288,6 @@ error: } } -/** - * Create a tag string from the supplied parameters. The lang, script and region - * parameters may be nullptr pointers. If they are, their corresponding length parameters - * must be less than or equal to 0. If the lang parameter is an empty string, the - * default value for an unknown language is written to the output buffer. - * - * If the length of the new string exceeds the capacity of the output buffer, - * the function copies as many bytes to the output buffer as it can, and returns - * the error U_BUFFER_OVERFLOW_ERROR. - * - * If an illegal argument is provided, the function returns the error - * U_ILLEGAL_ARGUMENT_ERROR. - * - * @param lang The language tag to use. - * @param langLength The length of the language tag. - * @param script The script tag to use. - * @param scriptLength The length of the script tag. - * @param region The region tag to use. - * @param regionLength The length of the region tag. - * @param trailing Any trailing data to append to the new tag. - * @param trailingLength The length of the trailing data. - * @param sink The output sink receiving the tag string. - * @param err A pointer to a UErrorCode for error reporting. - **/ -static void U_CALLCONV -createTagString( - const char* lang, - int32_t langLength, - const char* script, - int32_t scriptLength, - const char* region, - int32_t regionLength, - const char* trailing, - int32_t trailingLength, - icu::ByteSink& sink, - UErrorCode* err) -{ - createTagStringWithAlternates( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - trailing, - trailingLength, - nullptr, - sink, - err); -} - /** * Parse the language, script, and region subtags from a tag string, and copy the * results into the corresponding output parameters. The buffers are null-terminated, @@ -494,13 +371,6 @@ parseTagString( *scriptLength = subtagLength; if (*scriptLength > 0) { - if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { - /** - * If the script part is the "unknown" script, then don't return it. - **/ - *scriptLength = 0; - } - /* * Move past any separator. */ @@ -517,14 +387,7 @@ parseTagString( *regionLength = subtagLength; - if (*regionLength > 0) { - if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { - /** - * If the region part is the "unknown" region, then don't return it. - **/ - *regionLength = 0; - } - } else if (*position != 0 && *position != '@') { + if (*regionLength <= 0 && *position != 0 && *position != '@') { /* back up over consumed trailing separator */ --position; } @@ -546,264 +409,6 @@ error: goto exit; } -static UBool U_CALLCONV -createLikelySubtagsString( - const char* lang, - int32_t langLength, - const char* script, - int32_t scriptLength, - const char* region, - int32_t regionLength, - const char* variants, - int32_t variantsLength, - icu::ByteSink& sink, - UErrorCode* err) { - /** - * ULOC_FULLNAME_CAPACITY will provide enough capacity - * that we can build a string that contains the language, - * script and region code without worrying about overrunning - * the user-supplied buffer. - **/ - char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; - - if(U_FAILURE(*err)) { - goto error; - } - - /** - * Try the language with the script and region first. - **/ - if (scriptLength > 0 && regionLength > 0) { - - const char* likelySubtags = nullptr; - - icu::CharString tagBuffer; - { - icu::CharStringByteSink sink(&tagBuffer); - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - nullptr, - 0, - sink, - err); - } - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer.data(), - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != nullptr) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - createTagStringWithAlternates( - nullptr, - 0, - nullptr, - 0, - nullptr, - 0, - variants, - variantsLength, - likelySubtags, - sink, - err); - return true; - } - } - - /** - * Try the language with just the script. - **/ - if (scriptLength > 0) { - - const char* likelySubtags = nullptr; - - icu::CharString tagBuffer; - { - icu::CharStringByteSink sink(&tagBuffer); - createTagString( - lang, - langLength, - script, - scriptLength, - nullptr, - 0, - nullptr, - 0, - sink, - err); - } - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer.data(), - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != nullptr) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - createTagStringWithAlternates( - nullptr, - 0, - nullptr, - 0, - region, - regionLength, - variants, - variantsLength, - likelySubtags, - sink, - err); - return true; - } - } - - /** - * Try the language with just the region. - **/ - if (regionLength > 0) { - - const char* likelySubtags = nullptr; - - icu::CharString tagBuffer; - { - icu::CharStringByteSink sink(&tagBuffer); - createTagString( - lang, - langLength, - nullptr, - 0, - region, - regionLength, - nullptr, - 0, - sink, - err); - } - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer.data(), - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != nullptr) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - createTagStringWithAlternates( - nullptr, - 0, - script, - scriptLength, - nullptr, - 0, - variants, - variantsLength, - likelySubtags, - sink, - err); - return true; - } - } - - /** - * Finally, try just the language. - **/ - { - const char* likelySubtags = nullptr; - - icu::CharString tagBuffer; - { - icu::CharStringByteSink sink(&tagBuffer); - createTagString( - lang, - langLength, - nullptr, - 0, - nullptr, - 0, - nullptr, - 0, - sink, - err); - } - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer.data(), - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != nullptr) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - createTagStringWithAlternates( - nullptr, - 0, - script, - scriptLength, - region, - regionLength, - variants, - variantsLength, - likelySubtags, - sink, - err); - return true; - } - } - - return false; - -error: - - if (!U_FAILURE(*err)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - return false; -} - #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \ int32_t count = 0; \ int32_t i; \ @@ -836,7 +441,6 @@ _uloc_addLikelySubtags(const char* localeID, const char* trailing = ""; int32_t trailingLength = 0; int32_t trailingIndex = 0; - UBool success = false; if(U_FAILURE(*err)) { goto error; @@ -862,6 +466,9 @@ _uloc_addLikelySubtags(const char* localeID, goto error; } + if (langLength > 3) { + goto error; + } /* Find the length of the trailing portion. */ while (_isIDSeparator(localeID[trailingIndex])) { @@ -871,30 +478,42 @@ _uloc_addLikelySubtags(const char* localeID, trailingLength = (int32_t)uprv_strlen(trailing); CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); - - success = - createLikelySubtagsString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, + { + const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err); + if(U_FAILURE(*err)) { + goto error; + } + // We need to keep l on the stack because lsr may point into internal + // memory of l. + icu::Locale l = icu::Locale::createFromName(localeID); + if (l.isBogus()) { + goto error; + } + icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(l, true, *err); + if(U_FAILURE(*err)) { + goto error; + } + const char* language = lsr.language; + if (uprv_strcmp(language, "und") == 0) { + language = ""; + } + createTagStringWithAlternates( + language, + (int32_t)uprv_strlen(language), + lsr.script, + (int32_t)uprv_strlen(lsr.script), + lsr.region, + (int32_t)uprv_strlen(lsr.region), trailing, trailingLength, + nullptr, sink, err); - - if (!success) { - const int32_t localIDLength = (int32_t)uprv_strlen(localeID); - - /* - * If we get here, we need to return localeID. - */ - sink.Append(localeID, localIDLength); + if(U_FAILURE(*err)) { + goto error; + } } - - return success; + return true; error: @@ -913,6 +532,7 @@ static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*) static void _uloc_minimizeSubtags(const char* localeID, icu::ByteSink& sink, + bool favorScript, UErrorCode* err) { icu::CharString maximizedTagBuffer; @@ -925,7 +545,6 @@ _uloc_minimizeSubtags(const char* localeID, const char* trailing = ""; int32_t trailingLength = 0; int32_t trailingIndex = 0; - UBool successGetMax = false; if(U_FAILURE(*err)) { goto error; @@ -964,213 +583,38 @@ _uloc_minimizeSubtags(const char* localeID, CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); { - icu::CharString base; - { - icu::CharStringByteSink baseSink(&base); - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - nullptr, - 0, - baseSink, - err); - } - - /** - * First, we need to first get the maximization - * from AddLikelySubtags. - **/ - { - icu::CharStringByteSink maxSink(&maximizedTagBuffer); - successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err); - } - } - - if(U_FAILURE(*err)) { - goto error; - } - - if (!successGetMax) { - /** - * If we got here, return the locale ID parameter unchanged. - **/ - const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); - sink.Append(localeID, localeIDLength); - return; - } - - // In the following, the lang, script, region are referring to those in - // the maximizedTagBuffer, not the one in the localeID. - langLength = sizeof(lang); - scriptLength = sizeof(script); - regionLength = sizeof(region); - parseTagString( - maximizedTagBuffer.data(), - lang, - &langLength, - script, - &scriptLength, - region, - ®ionLength, - err); - if(U_FAILURE(*err)) { - goto error; - } - - /** - * Start first with just the language. - **/ - { - icu::CharString tagBuffer; - { - icu::CharStringByteSink tagSink(&tagBuffer); - createLikelySubtagsString( - lang, - langLength, - nullptr, - 0, - nullptr, - 0, - nullptr, - 0, - tagSink, - err); - } - + const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err); if(U_FAILURE(*err)) { goto error; } - else if (!tagBuffer.isEmpty() && - uprv_strnicmp( - maximizedTagBuffer.data(), - tagBuffer.data(), - tagBuffer.length()) == 0) { - - createTagString( - lang, - langLength, - nullptr, - 0, - nullptr, - 0, - trailing, - trailingLength, - sink, - err); - return; - } - } - - /** - * Next, try the language and region. - **/ - if (regionLength > 0) { - - icu::CharString tagBuffer; - { - icu::CharStringByteSink tagSink(&tagBuffer); - createLikelySubtagsString( - lang, - langLength, - nullptr, - 0, - region, - regionLength, - nullptr, - 0, - tagSink, - err); - } - + icu::LSR lsr = likelySubtags->minimizeSubtags( + {lang, langLength}, + {script, scriptLength}, + {region, regionLength}, + favorScript, + *err); if(U_FAILURE(*err)) { goto error; } - else if (!tagBuffer.isEmpty() && - uprv_strnicmp( - maximizedTagBuffer.data(), - tagBuffer.data(), - tagBuffer.length()) == 0) { - - createTagString( - lang, - langLength, - nullptr, - 0, - region, - regionLength, - trailing, - trailingLength, - sink, - err); - return; + const char* language = lsr.language; + if (uprv_strcmp(language, "und") == 0) { + language = ""; } - } - - /** - * Finally, try the language and script. This is our last chance, - * since trying with all three subtags would only yield the - * maximal version that we already have. - **/ - if (scriptLength > 0) { - icu::CharString tagBuffer; - { - icu::CharStringByteSink tagSink(&tagBuffer); - createLikelySubtagsString( - lang, - langLength, - script, - scriptLength, - nullptr, - 0, - nullptr, - 0, - tagSink, - err); - } - + createTagStringWithAlternates( + language, + (int32_t)uprv_strlen(language), + lsr.script, + (int32_t)uprv_strlen(lsr.script), + lsr.region, + (int32_t)uprv_strlen(lsr.region), + trailing, + trailingLength, + nullptr, + sink, + err); if(U_FAILURE(*err)) { goto error; } - else if (!tagBuffer.isEmpty() && - uprv_strnicmp( - maximizedTagBuffer.data(), - tagBuffer.data(), - tagBuffer.length()) == 0) { - - createTagString( - lang, - langLength, - script, - scriptLength, - nullptr, - 0, - trailing, - trailingLength, - sink, - err); - return; - } - } - - { - /** - * If we got here, return the max + trail. - **/ - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - trailing, - trailingLength, - sink, - err); return; } @@ -1181,31 +625,6 @@ error: } } -static int32_t -do_canonicalize(const char* localeID, - char* buffer, - int32_t bufferCapacity, - UErrorCode* err) -{ - int32_t canonicalizedSize = uloc_canonicalize( - localeID, - buffer, - bufferCapacity, - err); - - if (*err == U_STRING_NOT_TERMINATED_WARNING || - *err == U_BUFFER_OVERFLOW_ERROR) { - return canonicalizedSize; - } - else if (U_FAILURE(*err)) { - - return -1; - } - else { - return canonicalizedSize; - } -} - U_CAPI int32_t U_EXPORT2 uloc_addLikelySubtags(const char* localeID, char* maximizedLocaleID, @@ -1239,14 +658,13 @@ static UBool _ulocimp_addLikelySubtags(const char* localeID, icu::ByteSink& sink, UErrorCode* status) { - PreflightingLocaleIDBuffer localeBuffer; - do { - localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), - localeBuffer.getCapacity(), status); - } while (localeBuffer.needToTryAgain(status)); - + icu::CharString localeBuffer; + { + icu::CharStringByteSink localeSink(&localeBuffer); + ulocimp_canonicalize(localeID, localeSink, status); + } if (U_SUCCESS(*status)) { - return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status); + return _uloc_addLikelySubtags(localeBuffer.data(), sink, status); } else { return false; } @@ -1271,7 +689,7 @@ uloc_minimizeSubtags(const char* localeID, icu::CheckedArrayByteSink sink( minimizedLocaleID, minimizedLocaleIDCapacity); - ulocimp_minimizeSubtags(localeID, sink, status); + ulocimp_minimizeSubtags(localeID, sink, false, status); int32_t reslen = sink.NumberOfBytesAppended(); if (U_FAILURE(*status)) { @@ -1291,14 +709,14 @@ uloc_minimizeSubtags(const char* localeID, U_CAPI void U_EXPORT2 ulocimp_minimizeSubtags(const char* localeID, icu::ByteSink& sink, + bool favorScript, UErrorCode* status) { - PreflightingLocaleIDBuffer localeBuffer; - do { - localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), - localeBuffer.getCapacity(), status); - } while (localeBuffer.needToTryAgain(status)); - - _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status); + icu::CharString localeBuffer; + { + icu::CharStringByteSink localeSink(&localeBuffer); + ulocimp_canonicalize(localeID, localeSink, status); + } + _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status); } // Pairs of (language subtag, + or -) for finding out fast if common languages @@ -1374,16 +792,26 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, UErrorCode rgStatus = U_ZERO_ERROR; // First check for rg keyword value - int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); - if (U_FAILURE(rgStatus) || rgLen != 6) { + icu::CharString rg; + { + icu::CharStringByteSink sink(&rg); + ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus); + } + int32_t rgLen = rg.length(); + if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) { rgLen = 0; } else { - // rgBuf guaranteed to be zero terminated here, with text len 6 - char *rgPtr = rgBuf; - for (; *rgPtr!= 0; rgPtr++) { - *rgPtr = uprv_toupper(*rgPtr); + // chop off the subdivision code (which will generally be "zzzz" anyway) + const char* const data = rg.data(); + if (uprv_isASCIILetter(data[0])) { + rgLen = 2; + rgBuf[0] = uprv_toupper(data[0]); + rgBuf[1] = uprv_toupper(data[1]); + } else { + // assume three-digit region code + rgLen = 3; + uprv_memcpy(rgBuf, data, rgLen); } - rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; } if (rgLen == 0) { diff --git a/yass/third_party/icu/source/common/loclikelysubtags.cpp b/yass/third_party/icu/source/common/loclikelysubtags.cpp index e913c66a35..c2a7011b50 100644 --- a/yass/third_party/icu/source/common/loclikelysubtags.cpp +++ b/yass/third_party/icu/source/common/loclikelysubtags.cpp @@ -11,6 +11,7 @@ #include "unicode/locid.h" #include "unicode/uobject.h" #include "unicode/ures.h" +#include "unicode/uscript.h" #include "charstr.h" #include "cstring.h" #include "loclikelysubtags.h" @@ -23,6 +24,7 @@ #include "uniquecharstr.h" #include "uresdata.h" #include "uresimp.h" +#include "uvector.h" U_NAMESPACE_BEGIN @@ -81,11 +83,18 @@ struct XLikelySubtagsData { // Read all strings in the resource bundle and convert them to invariant char *. LocalMemory languageIndexes, regionIndexes, lsrSubtagIndexes; int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0; + ResourceArray m49Array; + if (likelyTable.findValue("m49", value)) { + m49Array = value.getArray(errorCode); + } else { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } if (!readStrings(likelyTable, "languageAliases", value, languageIndexes, languagesLength, errorCode) || !readStrings(likelyTable, "regionAliases", value, regionIndexes, regionsLength, errorCode) || - !readStrings(likelyTable, "lsrs", value, + !readLSREncodedStrings(likelyTable, "lsrnum", value, m49Array, lsrSubtagIndexes,lsrSubtagsLength, errorCode)) { return; } @@ -136,7 +145,7 @@ struct XLikelySubtagsData { if (!readStrings(matchTable, "partitions", value, partitionIndexes, partitionsLength, errorCode) || - !readStrings(matchTable, "paradigms", value, + !readLSREncodedStrings(matchTable, "paradigmnum", value, m49Array, paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) { return; } @@ -233,10 +242,96 @@ private: return false; } for (int i = 0; i < length; ++i) { - stringArray.getValue(i, value); // returns true because i < length - rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode); + if (stringArray.getValue(i, value)) { // returns true because i < length + int32_t strLength = 0; + rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode); + if (U_FAILURE(errorCode)) { return false; } + } + } + } + return true; + } + UnicodeString toLanguage(int encoded) { + if (encoded == 0) { + return UNICODE_STRING_SIMPLE(""); + } + if (encoded == 1) { + return UNICODE_STRING_SIMPLE("skip"); + } + encoded &= 0x00ffffff; + encoded %= 27*27*27; + char lang[3]; + lang[0] = 'a' + ((encoded % 27) - 1); + lang[1] = 'a' + (((encoded / 27 ) % 27) - 1); + if (encoded / (27 * 27) == 0) { + return UnicodeString(lang, 2, US_INV); + } + lang[2] = 'a' + ((encoded / (27 * 27)) - 1); + return UnicodeString(lang, 3, US_INV); + } + UnicodeString toScript(int encoded) { + if (encoded == 0) { + return UNICODE_STRING_SIMPLE(""); + } + if (encoded == 1) { + return UNICODE_STRING_SIMPLE("script"); + } + encoded = (encoded >> 24) & 0x000000ff; + const char* script = uscript_getShortName(static_cast(encoded)); + if (script == nullptr) { + return UNICODE_STRING_SIMPLE(""); + } + U_ASSERT(uprv_strlen(script) == 4); + return UnicodeString(script, 4, US_INV); + } + UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return UNICODE_STRING_SIMPLE(""); + } + if (m49Array.getValue(index, value)) { + return value.getUnicodeString(errorCode); + } + // "m49" does not include the index. + errorCode = U_MISSING_RESOURCE_ERROR; + return UNICODE_STRING_SIMPLE(""); + } + + UnicodeString toRegion(const ResourceArray& m49Array, ResourceValue &value, int encoded, UErrorCode &errorCode) { + if (encoded == 0 || encoded == 1) { + return UNICODE_STRING_SIMPLE(""); + } + encoded &= 0x00ffffff; + encoded /= 27 * 27 * 27; + encoded %= 27 * 27; + if (encoded < 27) { + // Selected M49 code index, find the code from "m49" resource. + return m49IndexToCode(m49Array, value, encoded, errorCode); + } + char region[2]; + region[0] = 'A' + ((encoded % 27) - 1); + region[1] = 'A' + (((encoded / 27) % 27) - 1); + return UnicodeString(region, 2, US_INV); + } + + bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array, + LocalMemory &indexes, int32_t &length, UErrorCode &errorCode) { + if (table.findValue(key, value)) { + const int32_t* vectors = value.getIntVector(length, errorCode); + if (U_FAILURE(errorCode)) { return false; } + if (length == 0) { return true; } + int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length * 3); + if (rawIndexes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + for (int i = 0; i < length; ++i) { + rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode); + rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode); + rawIndexes[i*3+2] = strings.addByValue( + toRegion(m49Array, value, vectors[i], errorCode), errorCode); if (U_FAILURE(errorCode)) { return false; } } + length *= 3; } return true; } @@ -245,15 +340,52 @@ private: namespace { XLikelySubtags *gLikelySubtags = nullptr; +UVector *gMacroregions = nullptr; UInitOnce gInitOnce {}; UBool U_CALLCONV cleanup() { delete gLikelySubtags; gLikelySubtags = nullptr; + delete gMacroregions; + gMacroregions = nullptr; gInitOnce.reset(); return true; } +static const char16_t RANGE_MARKER = 0x7E; /* '~' */ +UVector* loadMacroregions(UErrorCode &status) { + LocalPointer newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); + + LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status)); + LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status)); + LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status)); + LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status)); + + if (U_FAILURE(status)) { + return nullptr; + } + + while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) { + UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status); + int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); + char16_t buf[6]; + regionName.extract(buf,6,status); + if ( rangeMarkerLocation > 0 ) { + char16_t endRange = regionName.charAt(rangeMarkerLocation+1); + buf[rangeMarkerLocation] = 0; + while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) { + LocalPointer newRegion(new UnicodeString(buf), status); + newMacroRegions->adoptElement(newRegion.orphan(),status); + buf[rangeMarkerLocation-1]++; + } + } else { + LocalPointer newRegion(new UnicodeString(regionName), status); + newMacroRegions->adoptElement(newRegion.orphan(),status); + } + } + return newMacroRegions.orphan(); +} + } // namespace void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) { @@ -263,10 +395,14 @@ void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) { data.load(errorCode); if (U_FAILURE(errorCode)) { return; } gLikelySubtags = new XLikelySubtags(data); - if (gLikelySubtags == nullptr) { + gMacroregions = loadMacroregions(errorCode); + if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) { + delete gLikelySubtags; + delete gMacroregions; errorCode = U_MEMORY_ALLOCATION_ERROR; return; } + ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup); } @@ -317,15 +453,32 @@ XLikelySubtags::~XLikelySubtags() { delete[] lsrs; } -LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const { +LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const { + if (locale.isBogus()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return LSR("", "", "", LSR::EXPLICIT_LSR); + } const char *name = locale.getName(); if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") // Private use language tag x-subtag-subtag... which CLDR changes to // und-x-subtag-subtag... return LSR(name, "", "", LSR::EXPLICIT_LSR); } - return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), - locale.getVariant(), errorCode); + LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), + locale.getVariant(), returnInputIfUnmatch, errorCode); + + if (uprv_strlen(max.language) == 0 && + uprv_strlen(max.script) == 0 && + uprv_strlen(max.region) == 0) { + // No match. ICU API mandate us to + // If the provided ULocale instance is already in the maximal form, or + // there is no data available available for maximization, it will be + // returned. + return LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR::EXPLICIT_LSR, errorCode); + } + return max; } namespace { @@ -338,7 +491,9 @@ const char *getCanonical(const CharStringMap &aliases, const char *alias) { } // namespace LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region, - const char *variant, UErrorCode &errorCode) const { + const char *variant, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const { // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK. // They should match only themselves, // not other locales with what looks like the same language and script subtags. @@ -378,64 +533,91 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c language = getCanonical(languageAliases, language); // (We have no script mappings.) region = getCanonical(regionAliases, region); - return maximize(language, script, region); + return maximize(language, script, region, returnInputIfUnmatch, errorCode); } -LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const { - if (uprv_strcmp(language, "und") == 0) { +LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const { + return maximize({language, (int32_t)uprv_strlen(language)}, + {script, (int32_t)uprv_strlen(script)}, + {region, (int32_t)uprv_strlen(region)}, + returnInputIfUnmatch, + errorCode); +} + +bool XLikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const { + // In Java, we use Region class. In C++, since Region is under i18n, + // we read the same data used by Region into gMacroregions avoid dependency + // from common to i18n/region.cpp + if (U_FAILURE(errorCode)) { return false; } + umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode); + if (U_FAILURE(errorCode)) { return false; } + UnicodeString str(UnicodeString::fromUTF8(region)); + return gMacroregions->contains((void *)&str); +} + +LSR XLikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode); + } + if (language.compare("und") == 0) { language = ""; } - if (uprv_strcmp(script, "Zzzz") == 0) { + if (script.compare("Zzzz") == 0) { script = ""; } - if (uprv_strcmp(region, "ZZ") == 0) { + if (region.compare("ZZ") == 0) { region = ""; } - if (*script != 0 && *region != 0 && *language != 0) { - return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized + if (!script.empty() && !region.empty() && !language.empty()) { + return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode); // already maximized } + bool retainLanguage = false; + bool retainScript = false; + bool retainRegion = false; - uint32_t retainOldMask = 0; BytesTrie iter(trie); uint64_t state; int32_t value; // Small optimization: Array lookup for first language letter. int32_t c0; - if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 && - language[1] != 0 && // language.length() >= 2 + if (0 <= (c0 = uprv_lowerOrdinal(language.data()[0])) && c0 <= 25 && + language.length() >= 2 && (state = trieFirstLetterStates[c0]) != 0) { value = trieNext(iter.resetToState64(state), language, 1); } else { value = trieNext(iter, language, 0); } + bool matchLanguage = (value >= 0); + bool matchScript = false; if (value >= 0) { - if (*language != 0) { - retainOldMask |= 4; - } + retainLanguage = !language.empty(); state = iter.getState64(); } else { - retainOldMask |= 4; + retainLanguage = true; iter.resetToState64(trieUndState); // "und" ("*") state = 0; } + if (value >= 0 && !script.empty()) { + matchScript = true; + } if (value > 0) { // Intermediate or final value from just language. if (value == SKIP_SCRIPT) { value = 0; } - if (*script != 0) { - retainOldMask |= 2; - } + retainScript = !script.empty(); } else { value = trieNext(iter, script, 0); if (value >= 0) { - if (*script != 0) { - retainOldMask |= 2; - } + retainScript = !script.empty(); state = iter.getState64(); } else { - retainOldMask |= 2; + retainScript = true; if (state == 0) { iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**") } else { @@ -447,19 +629,19 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha } } + bool matchRegion = false; if (value > 0) { // Final value from just language or language+script. - if (*region != 0) { - retainOldMask |= 1; - } + retainRegion = !region.empty(); } else { value = trieNext(iter, region, 0); if (value >= 0) { - if (*region != 0) { - retainOldMask |= 1; + if (!region.empty() && !isMacroregion(region, errorCode)) { + retainRegion = true; + matchRegion = true; } } else { - retainOldMask |= 1; + retainRegion = true; if (state == 0) { value = defaultLsrIndex; } else { @@ -470,28 +652,33 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha } } U_ASSERT(value < lsrsLength); - const LSR &result = lsrs[value]; + const LSR &matched = lsrs[value]; - if (*language == 0) { - language = "und"; + if (returnInputIfUnmatch && + (!(matchLanguage || matchScript || (matchRegion && language.empty())))) { + return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching. + } + if (language.empty()) { + language = StringPiece("und"); } - if (retainOldMask == 0) { + if (!(retainLanguage || retainScript || retainRegion)) { // Quickly return a copy of the lookup-result LSR // without new allocation of the subtags. - return LSR(result.language, result.script, result.region, result.flags); + return LSR(matched.language, matched.script, matched.region, matched.flags); } - if ((retainOldMask & 4) == 0) { - language = result.language; + if (!retainLanguage) { + language = matched.language; } - if ((retainOldMask & 2) == 0) { - script = result.script; + if (!retainScript) { + script = matched.script; } - if ((retainOldMask & 1) == 0) { - region = result.region; + if (!retainRegion) { + region = matched.region; } + int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0); // retainOldMask flags = LSR explicit-subtag flags - return LSR(language, script, region, retainOldMask); + return LSR(language, script, region, retainMask, errorCode); } int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const { @@ -627,57 +814,97 @@ int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) { default: return -1; } } - -// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code -// in loclikely.cpp to this new code, including activating this -// minimizeSubtags() function. The LocaleMatcher does not minimize. -#if 0 -LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn, - const char *regionIn, ULocale.Minimize fieldToFavor, - UErrorCode &errorCode) const { - LSR result = maximize(languageIn, scriptIn, regionIn); - - // We could try just a series of checks, like: - // LSR result2 = addLikelySubtags(languageIn, "", ""); - // if result.equals(result2) return result2; - // However, we can optimize 2 of the cases: - // (languageIn, "", "") - // (languageIn, "", regionIn) - - // value00 = lookup(result.language, "", "") - BytesTrie iter = new BytesTrie(trie); - int value = trieNext(iter, result.language, 0); - U_ASSERT(value >= 0); - if (value == 0) { - value = trieNext(iter, "", 0); - U_ASSERT(value >= 0); - if (value == 0) { - value = trieNext(iter, "", 0); +int32_t XLikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) { + UStringTrieResult result; + uint8_t c; + if (s.length() == i) { + result = iter.next(u'*'); + } else { + c = s.data()[i]; + for (;;) { + c = uprv_invCharToAscii(c); + // EBCDIC: If s[i] is not an invariant character, + // then c is now 0 and will simply not match anything, which is harmless. + if (i+1 != s.length()) { + if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) { + return -1; + } + c = s.data()[++i]; + } else { + // last character of this subtag + result = iter.next(c | 0x80); + break; + } } } - U_ASSERT(value > 0); - LSR value00 = lsrs[value]; - boolean favorRegionOk = false; - if (result.script.equals(value00.script)) { //script is default - if (result.region.equals(value00.region)) { - return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS); - } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) { - return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS); - } else { - favorRegionOk = true; - } + switch (result) { + case USTRINGTRIE_NO_MATCH: return -1; + case USTRINGTRIE_NO_VALUE: return 0; + case USTRINGTRIE_INTERMEDIATE_VALUE: + U_ASSERT(iter.getValue() == SKIP_SCRIPT); + return SKIP_SCRIPT; + case USTRINGTRIE_FINAL_VALUE: return iter.getValue(); + default: return -1; } - - // The last case is not as easy to optimize. - // Maybe do later, but for now use the straightforward code. - LSR result2 = maximize(languageIn, scriptIn, ""); - if (result2.equals(result)) { - return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS); - } else if (favorRegionOk) { - return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS); - } - return result; } -#endif + +LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script, + StringPiece region, + bool favorScript, + UErrorCode &errorCode) const { + LSR max = maximize(language, script, region, true, errorCode); + if (U_FAILURE(errorCode)) { + return max; + } + // If no match, return it. + if (uprv_strlen(max.language) == 0 && + uprv_strlen(max.script) == 0 && + uprv_strlen(max.region) == 0) { + // No match. ICU API mandate us to + // "If this Locale is already in the minimal form, or not valid, or + // there is no data available for minimization, the Locale will be + // unchanged." + return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode); + } + // try language + LSR test = maximize(max.language, "", "", true, errorCode); + if (U_FAILURE(errorCode)) { + return max; + } + if (test.isEquivalentTo(max)) { + return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode); + } + + if (!favorScript) { + // favor Region + // try language and region + test = maximize(max.language, "", max.region, true, errorCode); + if (U_FAILURE(errorCode)) { + return max; + } + if (test.isEquivalentTo(max)) { + return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode); + } + } + // try language and script + test = maximize(max.language, max.script, "", true, errorCode); + if (U_FAILURE(errorCode)) { + return max; + } + if (test.isEquivalentTo(max)) { + return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode); + } + if (favorScript) { + // try language and region + test = maximize(max.language, "", max.region, true, errorCode); + if (U_FAILURE(errorCode)) { + return max; + } + if (test.isEquivalentTo(max)) { + return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode); + } + } + return LSR(max.language, max.script, max.region, LSR::DONT_CARE_FLAGS, errorCode); +} U_NAMESPACE_END diff --git a/yass/third_party/icu/source/common/loclikelysubtags.h b/yass/third_party/icu/source/common/loclikelysubtags.h index 14a01a5eac..ebd9c15306 100644 --- a/yass/third_party/icu/source/common/loclikelysubtags.h +++ b/yass/third_party/icu/source/common/loclikelysubtags.h @@ -11,6 +11,7 @@ #include "unicode/utypes.h" #include "unicode/bytestrie.h" #include "unicode/locid.h" +#include "unicode/stringpiece.h" #include "unicode/uobject.h" #include "unicode/ures.h" #include "charstrmap.h" @@ -47,7 +48,9 @@ public: static const XLikelySubtags *getSingleton(UErrorCode &errorCode); // VisibleForTesting - LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const; + LSR makeMaximizedLsrFrom(const Locale &locale, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const; /** * Tests whether lsr is "more likely" than other. @@ -61,13 +64,9 @@ public: */ int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const; - // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code - // in loclikely.cpp to this new code, including activating this - // minimizeSubtags() function. The LocaleMatcher does not minimize. -#if 0 - LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn, - ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const; -#endif + LSR minimizeSubtags(StringPiece language, StringPiece script, StringPiece region, + bool favorScript, + UErrorCode &errorCode) const; // visible for LocaleDistance const LocaleDistanceData &getDistanceData() const { return distanceData; } @@ -80,16 +79,25 @@ private: static void initLikelySubtags(UErrorCode &errorCode); LSR makeMaximizedLsr(const char *language, const char *script, const char *region, - const char *variant, UErrorCode &errorCode) const; + const char *variant, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const; /** * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN". */ - LSR maximize(const char *language, const char *script, const char *region) const; + LSR maximize(const char *language, const char *script, const char *region, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const; + LSR maximize(StringPiece language, StringPiece script, StringPiece region, + bool returnInputIfUnmatch, + UErrorCode &errorCode) const; int32_t getLikelyIndex(const char *language, const char *script) const; + bool isMacroregion(StringPiece& region, UErrorCode &errorCode) const; static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i); + static int32_t trieNext(BytesTrie &iter, StringPiece s, int32_t i); UResourceBundle *langInfoBundle; // We could store the strings by value, except that if there were few enough strings, diff --git a/yass/third_party/icu/source/common/locmap.cpp b/yass/third_party/icu/source/common/locmap.cpp index 7a0e90e8bd..e41cfd1027 100644 --- a/yass/third_party/icu/source/common/locmap.cpp +++ b/yass/third_party/icu/source/common/locmap.cpp @@ -1170,7 +1170,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status) // conversion functionality when available. #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API int32_t len; - char baseName[ULOC_FULLNAME_CAPACITY] = {}; + icu::CharString baseName; const char * mylocaleID = localeID; // Check any for keywords. @@ -1189,19 +1189,23 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status) else { // If the locale ID contains keywords other than collation, just use the base name. - len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status); - - if (U_SUCCESS(*status) && len > 0) { - baseName[len] = 0; - mylocaleID = baseName; + icu::CharStringByteSink sink(&baseName); + ulocimp_getBaseName(localeID, sink, status); + } + if (U_SUCCESS(*status) && !baseName.isEmpty()) + { + mylocaleID = baseName.data(); } } } - char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form - (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), false, status); + icu::CharString asciiBCP47Tag; + { + icu::CharStringByteSink sink(&asciiBCP47Tag); + ulocimp_toLanguageTag(mylocaleID, sink, false, status); + } if (U_SUCCESS(*status)) { diff --git a/yass/third_party/icu/source/common/locresdata.cpp b/yass/third_party/icu/source/common/locresdata.cpp index 7a0969dff5..c9d1cdddde 100644 --- a/yass/third_party/icu/source/common/locresdata.cpp +++ b/yass/third_party/icu/source/common/locresdata.cpp @@ -24,6 +24,8 @@ #include "unicode/putil.h" #include "unicode/uloc.h" #include "unicode/ures.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cstring.h" #include "ulocimp.h" #include "uresimp.h" @@ -156,16 +158,18 @@ _uloc_getOrientationHelper(const char* localeId, ULayoutType result = ULOC_LAYOUT_UNKNOWN; if (!U_FAILURE(*status)) { - int32_t length = 0; - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status); + icu::CharString localeBuffer; + { + icu::CharStringByteSink sink(&localeBuffer); + ulocimp_canonicalize(localeId, sink, status); + } if (!U_FAILURE(*status)) { + int32_t length = 0; const char16_t* const value = uloc_getTableStringWithFallback( nullptr, - localeBuffer, + localeBuffer.data(), "layout", nullptr, key, diff --git a/yass/third_party/icu/source/common/lsr.cpp b/yass/third_party/icu/source/common/lsr.cpp index 39eb46df27..bd231ecdb5 100644 --- a/yass/third_party/icu/source/common/lsr.cpp +++ b/yass/third_party/icu/source/common/lsr.cpp @@ -31,6 +31,26 @@ LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t } } +LSR::LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f, + UErrorCode &errorCode) : + language(nullptr), script(nullptr), region(nullptr), + regionIndex(indexForRegion(r.data())), flags(f) { + if (U_SUCCESS(errorCode)) { + CharString data; + data.append(lang, errorCode).append('\0', errorCode); + int32_t scriptOffset = data.length(); + data.append(scr, errorCode).append('\0', errorCode); + int32_t regionOffset = data.length(); + data.append(r, errorCode); + owned = data.cloneData(errorCode); + if (U_SUCCESS(errorCode)) { + language = owned; + script = owned + scriptOffset; + region = owned + regionOffset; + } + } +} + LSR::LSR(LSR &&other) noexcept : language(other.language), script(other.script), region(other.region), owned(other.owned), regionIndex(other.regionIndex), flags(other.flags), diff --git a/yass/third_party/icu/source/common/lsr.h b/yass/third_party/icu/source/common/lsr.h index a2f7c8bb15..313286e93d 100644 --- a/yass/third_party/icu/source/common/lsr.h +++ b/yass/third_party/icu/source/common/lsr.h @@ -7,6 +7,7 @@ #ifndef __LSR_H__ #define __LSR_H__ +#include "unicode/stringpiece.h" #include "unicode/utypes.h" #include "unicode/uobject.h" #include "cstring.h" @@ -45,6 +46,8 @@ struct LSR final : public UMemory { */ LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f, UErrorCode &errorCode); + LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f, + UErrorCode &errorCode); LSR(LSR &&other) noexcept; LSR(const LSR &other) = delete; inline ~LSR() { diff --git a/yass/third_party/icu/source/common/norm2_nfc_data.h b/yass/third_party/icu/source/common/norm2_nfc_data.h index ebe3e6ba90..3dada06c57 100644 --- a/yass/third_party/icu/source/common/norm2_nfc_data.h +++ b/yass/third_party/icu/source/common/norm2_nfc_data.h @@ -10,7 +10,7 @@ #ifdef INCLUDED_FROM_NORMALIZER2_CPP static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0}; -static const UVersionInfo norm2_nfc_data_dataVersion={0xf,0,0,0}; +static const UVersionInfo norm2_nfc_data_dataVersion={0xf,1,0,0}; static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={ 0x50,0x4cb8,0x8920,0x8a20,0x8a20,0x8a20,0x8a20,0x8a20,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c, diff --git a/yass/third_party/icu/source/common/norm2allmodes.h b/yass/third_party/icu/source/common/norm2allmodes.h index 6347fba9cb..a2cfc89c1a 100644 --- a/yass/third_party/icu/source/common/norm2allmodes.h +++ b/yass/third_party/icu/source/common/norm2allmodes.h @@ -391,6 +391,7 @@ struct Norm2AllModes : public UMemory { static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKC_SCFInstance(UErrorCode &errorCode); Normalizer2Impl *impl; ComposeNormalizer2 comp; diff --git a/yass/third_party/icu/source/common/normalizer2impl.h b/yass/third_party/icu/source/common/normalizer2impl.h index 2cca33d349..f5ede24fc2 100644 --- a/yass/third_party/icu/source/common/normalizer2impl.h +++ b/yass/third_party/icu/source/common/normalizer2impl.h @@ -789,7 +789,8 @@ unorm_getFCD16(UChar32 c); * * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms. * ICU ships with data files for standard Unicode Normalization Forms - * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm). + * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm), + * NFKC_Casefold (nfkc_cf.nrm) and NFKC_Simple_Casefold (nfkc_scf.nrm). * Custom (application-specific) data can be built into additional .nrm files * with the gennorm2 build tool. * ICU ships with one such file, uts46.nrm, for the implementation of UTS #46. diff --git a/yass/third_party/icu/source/common/propname_data.h b/yass/third_party/icu/source/common/propname_data.h index 1e247874b6..7bdbe8ec43 100644 --- a/yass/third_party/icu/source/common/propname_data.h +++ b/yass/third_party/icu/source/common/propname_data.h @@ -11,101 +11,102 @@ U_NAMESPACE_BEGIN -const int32_t PropNameData::indexes[8]={0x20,0x1660,0x5294,0xacd0,0xacd0,0xacd0,0x31,0}; +const int32_t PropNameData::indexes[8]={0x20,0x1690,0x5337,0xae61,0xae61,0xae61,0x31,0}; -const int32_t PropNameData::valueMaps[1424]={ -6,0,0x48,0,0xf1,0x368,0xf1,0x37e,0xf1,0x393,0xf1,0x3a9,0xf1,0x3b4,0xf1,0x3d5, -0xf1,0x3e5,0xf1,0x3f4,0xf1,0x402,0xf1,0x426,0xf1,0x43d,0xf1,0x455,0xf1,0x46c,0xf1,0x47b, -0xf1,0x48a,0xf1,0x49b,0xf1,0x4a9,0xf1,0x4bb,0xf1,0x4d5,0xf1,0x4f0,0xf1,0x505,0xf1,0x522, -0xf1,0x533,0xf1,0x53e,0xf1,0x55d,0xf1,0x573,0xf1,0x584,0xf1,0x594,0xf1,0x5af,0xf1,0x5c8, -0xf1,0x5d9,0xf1,0x5f3,0xf1,0x606,0xf1,0x616,0xf1,0x630,0xf1,0x649,0xf1,0x660,0xf1,0x674, -0xf1,0x68a,0xf1,0x69e,0xf1,0x6b4,0xf1,0x6ce,0xf1,0x6e6,0xf1,0x702,0xf1,0x70a,0xf1,0x712, -0xf1,0x71a,0xf1,0x722,0xf1,0x72b,0xf1,0x738,0xf1,0x74b,0xf1,0x768,0xf1,0x785,0xf1,0x7a2, -0xf1,0x7c0,0xf1,0x7de,0xf1,0x802,0xf1,0x80f,0xf1,0x829,0xf1,0x83e,0xf1,0x859,0xf1,0x870, -0xf1,0x887,0xf1,0x8a9,0xf1,0x8c8,0xf1,0x8e1,0xf1,0x90e,0xf1,0x947,0xf1,0x978,0xf1,0x9a7, -0xf1,0x9d6,0xf1,0x1000,0x1019,0x9eb,0x16d,0xc0b,0x188,0x3279,0xf7,0x3298,0x2d4,0x33d6,0x2ea,0x3430, -0x2f4,0x368d,0x316,0x3fb8,0x382,0x4028,0x38c,0x42c2,0x3bb,0x4300,0x3c3,0x4e45,0x48f,0x4ec3,0x499,0x4ee8, -0x49f,0x4f02,0x4a5,0x4f23,0x4ac,0x4f3d,0xf7,0x4f62,0xf7,0x4f88,0x4b3,0x5032,0x4c9,0x50ab,0x4dc,0x515d, -0x4f7,0x5194,0x4fe,0x5374,0x512,0x57f4,0x53a,0x2000,0x2001,0x5853,0x542,0x3000,0x3001,0x58df,0,0x4000, -0x400e,0x58f1,0,0x58fa,0,0x5914,0,0x5925,0,0x5936,0,0x594c,0,0x5955,0,0x5972, -0,0x5990,0,0x59ae,0,0x59cc,0,0x59e2,0,0x59f6,0,0x5a0c,0,0x7000,0x7001,0x5a25, -0,0x844,0x12,0,1,0x12,0x20,0x862,0x4a,0,1,6,7,8,9,0xa, -0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a, -0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x54,0x5b,0x67,0x6b,0x76,0x7a, -0x81,0x82,0x84,0x85,0xc8,0xca,0xd6,0xd8,0xda,0xdc,0xde,0xe0,0xe2,0xe4,0xe6,0xe8, -0xe9,0xea,0xf0,0x2e,0x40,0x4c,0x5e,0x68,0x79,0x84,0x91,0x9e,0xab,0xb8,0xc5,0xd2, -0xdf,0xec,0xf9,0x106,0x113,0x120,0x12d,0x13a,0x147,0x154,0x161,0x16e,0x17b,0x188,0x195,0x1a2, -0x1af,0x1bc,0x1c9,0x1d6,0x1e3,0x1f0,0x1fd,0x20c,0x21b,0x22a,0x239,0x248,0x257,0x266,0x275,0x28f, -0x2a3,0x2b7,0x2d2,0x2e1,0x2ea,0x2fa,0x302,0x30b,0x31a,0x323,0x333,0x344,0x355,0xa03,1,0, -0x17,0x9fa,0xa0b,0xa1c,0xa30,0xa47,0xa5f,0xa71,0xa86,0xa9d,0xab2,0xac2,0xad4,0xaf1,0xb0d,0xb1f, -0xb3c,0xb58,0xb74,0xb89,0xb9e,0xbb8,0xbd3,0xbee,0xba5,1,0,0x148,0xc16,0xc23,0xc36,0xc5e, -0xc7c,0xc9a,0xcb2,0xcdd,0xd07,0xd1f,0xd32,0xd45,0xd54,0xd63,0xd72,0xd81,0xd98,0xda9,0xdbc,0xdcf, -0xddc,0xde9,0xdf8,0xe09,0xe1e,0xe2f,0xe3a,0xe43,0xe54,0xe65,0xe78,0xe8a,0xe9d,0xeb0,0xeef,0xefc, -0xf09,0xf16,0xf2b,0xf5b,0xf75,0xf96,0xfc1,0xfe4,0x1042,0x1069,0x1084,0x1093,0x10ba,0x10e2,0x1105,0x1128, -0x1152,0x116b,0x118a,0x11ad,0x11d1,0x11e4,0x11fe,0x1228,0x1240,0x1268,0x1291,0x12a4,0x12b7,0x12ca,0x12f1,0x1300, -0x1320,0x134e,0x136c,0x139a,0x13b6,0x13d1,0x13ea,0x1403,0x1424,0x1454,0x1473,0x1495,0x14c9,0x14f6,0x153b,0x155c, -0x1586,0x15a7,0x15d0,0x15e3,0x1616,0x162d,0x163c,0x164d,0x1678,0x168f,0x16c0,0x16ee,0x1731,0x173c,0x1775,0x1786, -0x1797,0x17a4,0x17b7,0x17f1,0x1815,0x1839,0x1873,0x18ab,0x18d6,0x18ee,0x191a,0x1946,0x1953,0x1962,0x197f,0x19a1, -0x19cf,0x19ef,0x1a16,0x1a3d,0x1a5c,0x1a6f,0x1a80,0x1a91,0x1ab6,0x1adb,0x1b02,0x1b36,0x1b63,0x1b81,0x1b94,0x1bad, -0x1be6,0x1bf5,0x1c15,0x1c37,0x1c59,0x1c70,0x1c87,0x1cb4,0x1ccd,0x1ce6,0x1d17,0x1d41,0x1d5c,0x1d6f,0x1d8e,0x1d97, -0x1daa,0x1dc8,0x1de6,0x1df9,0x1e10,0x1e25,0x1e5a,0x1e7e,0x1e93,0x1ea2,0x1eb5,0x1ed9,0x1ee2,0x1f06,0x1f1d,0x1f30, -0x1f3f,0x1f4a,0x1f6b,0x1f83,0x1f92,0x1fa1,0x1fb0,0x1fc7,0x1fdc,0x1ff1,0x202a,0x203d,0x2059,0x2064,0x2071,0x209f, -0x20c3,0x20e6,0x20f9,0x211b,0x212e,0x2149,0x216c,0x218f,0x21b4,0x21c5,0x21f4,0x2221,0x2238,0x2253,0x2262,0x228d, -0x22c5,0x22ff,0x232d,0x233e,0x234b,0x236f,0x237e,0x239a,0x23b4,0x23d1,0x2409,0x241e,0x244b,0x246a,0x2498,0x24b8, -0x24ec,0x24fb,0x2525,0x2548,0x2573,0x257e,0x258f,0x25aa,0x25ce,0x25db,0x25f0,0x2617,0x2642,0x2679,0x268c,0x269d, -0x26cd,0x26de,0x26ed,0x2702,0x2720,0x2733,0x2746,0x275d,0x277a,0x2785,0x278e,0x27b0,0x27c5,0x27ea,0x2801,0x282a, -0x2845,0x285a,0x2873,0x2894,0x28c9,0x28da,0x290b,0x292f,0x2940,0x2959,0x2964,0x2991,0x29b3,0x29e1,0x2a14,0x2a23, -0x2a34,0x2a51,0x2a93,0x2aba,0x2ac7,0x2adc,0x2b00,0x2b26,0x2b5f,0x2b70,0x2b94,0x2b9f,0x2bac,0x2bbb,0x2be0,0x2c0e, -0x2c2a,0x2c47,0x2c54,0x2c65,0x2c83,0x2ca6,0x2cc3,0x2cd0,0x2cf0,0x2d0d,0x2d2e,0x2d57,0x2d68,0x2d87,0x2da0,0x2db9, -0x2dca,0x2e13,0x2e24,0x2e3d,0x2e6c,0x2e99,0x2ebe,0x2f00,0x2f1c,0x2f2b,0x2f42,0x2f70,0x2f89,0x2fb2,0x2fcc,0x3007, -0x3025,0x3034,0x3054,0x306f,0x3093,0x30af,0x30cd,0x30eb,0x3102,0x3111,0x311c,0x3159,0x316c,0x3196,0x31b6,0x31e4, -0x3208,0x3230,0x3255,0x3260,0x1fa9,1,0,0x12,0x32af,0x32bf,0x32d2,0x32e2,0x32f2,0x3301,0x3311,0x3323, -0x3336,0x3348,0x3358,0x3368,0x3377,0x3386,0x3396,0x33a3,0x33b2,0x33c6,0x2067,1,0,6,0x33eb,0x33f6, -0x3403,0x3410,0x341d,0x3428,0x20ab,1,0,0x1e,0x3445,0x3454,0x3469,0x347e,0x3493,0x34a7,0x34b8,0x34cc, -0x34df,0x34f0,0x3509,0x351b,0x352c,0x3540,0x3553,0x356b,0x357d,0x3588,0x3598,0x35a6,0x35bb,0x35d0,0x35e6,0x3600, -0x3616,0x3626,0x363a,0x364e,0x365f,0x3677,0x22d6,1,0,0x68,0x369f,0x36c2,0x36cb,0x36d8,0x36e3,0x36ec, -0x36f7,0x3700,0x3719,0x371e,0x3727,0x3744,0x374d,0x375a,0x3763,0x3787,0x378e,0x3797,0x37aa,0x37b5,0x37be,0x37c9, -0x37e2,0x37eb,0x37fa,0x3805,0x380e,0x3819,0x3822,0x3829,0x3832,0x383d,0x3846,0x385f,0x3868,0x3875,0x3880,0x3891, -0x389c,0x38b1,0x38c8,0x38d1,0x38da,0x38f3,0x38fe,0x3907,0x3910,0x3927,0x3944,0x394f,0x3960,0x396b,0x3972,0x397f, -0x398c,0x39b9,0x39ce,0x39d7,0x39f2,0x3a15,0x3a36,0x3a57,0x3a7c,0x3aa3,0x3ac4,0x3ae7,0x3b08,0x3b2f,0x3b50,0x3b75, -0x3b94,0x3bb3,0x3bd2,0x3bef,0x3c10,0x3c31,0x3c54,0x3c79,0x3c98,0x3cb7,0x3cd8,0x3cff,0x3d24,0x3d43,0x3d64,0x3d87, -0x3da2,0x3dbb,0x3dd6,0x3def,0x3e0c,0x3e27,0x3e44,0x3e63,0x3e80,0x3e9d,0x3ebc,0x3ed9,0x3ef4,0x3f11,0x3f2e,0x3f61, -0x3f88,0x3f9b,0x2639,1,0,6,0x3fc9,0x3fd8,0x3fe8,0x3ff8,0x4008,0x4019,0x2697,1,0,0x2b, -0x4037,0x4043,0x4051,0x4060,0x406f,0x407f,0x4090,0x40a4,0x40b9,0x40cf,0x40e2,0x40f6,0x4106,0x410f,0x411a,0x412a, -0x4146,0x4158,0x4166,0x4175,0x4181,0x4196,0x41aa,0x41bd,0x41cb,0x41df,0x41ed,0x41f7,0x4209,0x4215,0x4223,0x4233, -0x423a,0x4241,0x4248,0x424f,0x4256,0x426c,0x428d,0x870,0x429f,0x42aa,0x42b9,0x28f0,1,0,4,0x42d3, -0x42de,0x42ea,0x42f4,0x2916,1,0,0xc8,0x430b,0x4318,0x432d,0x433a,0x4349,0x4357,0x4366,0x4375,0x4387, -0x4396,0x43a4,0x43b5,0x43c4,0x43d3,0x43e0,0x43ec,0x43fb,0x440a,0x4414,0x4421,0x442e,0x443d,0x444b,0x445a,0x4466, -0x4470,0x447c,0x448c,0x449c,0x44aa,0x44b6,0x44c7,0x44d3,0x44df,0x44ed,0x44fa,0x4506,0x4513,0xe2f,0x4520,0x452e, -0x4548,0x4551,0x455f,0x456d,0x4579,0x4588,0x4596,0x45a4,0x45b0,0x45bf,0x45cd,0x45db,0x45e8,0x45f7,0x4612,0x4621, -0x4632,0x4643,0x4656,0x4668,0x4677,0x4689,0x4698,0x46a4,0x46af,0x1f3f,0x46bc,0x46c7,0x46d2,0x46dd,0x46e8,0x4703, -0x470e,0x4719,0x4724,0x4737,0x474b,0x4756,0x4765,0x4774,0x477f,0x478a,0x4797,0x47a6,0x47b4,0x47bf,0x47da,0x47e4, -0x47f5,0x4806,0x4815,0x4826,0x4831,0x483c,0x4847,0x4852,0x485d,0x4868,0x4873,0x487d,0x4888,0x4898,0x48a3,0x48b1, -0x48be,0x48c9,0x48d8,0x48e5,0x48f2,0x4901,0x490e,0x491f,0x4931,0x4941,0x494c,0x495f,0x4976,0x4984,0x4991,0x499c, -0x49a9,0x49ba,0x49d6,0x49ec,0x49f7,0x4a14,0x4a24,0x4a33,0x4a3e,0x4a49,0x2059,0x4a55,0x4a60,0x4a78,0x4a88,0x4a97, -0x4aa5,0x4ab3,0x4abe,0x4ac9,0x4add,0x4af4,0x4b0c,0x4b1c,0x4b2c,0x4b3c,0x4b4e,0x4b59,0x4b64,0x4b6e,0x4b7a,0x4b88, -0x4b9b,0x4ba7,0x4bb4,0x4bbf,0x4bdb,0x4be8,0x4bf6,0x4c0f,0x2959,0x4c1e,0x277a,0x4c2b,0x4c39,0x4c4b,0x4c59,0x4c65, -0x4c75,0x2b94,0x4c83,0x4c8f,0x4c9a,0x4ca5,0x4cb0,0x4cc4,0x4cd2,0x4ce9,0x4cf5,0x4d09,0x4d17,0x4d29,0x4d3f,0x4d4d, -0x4d5f,0x4d6d,0x4d8a,0x4d9c,0x4da9,0x4dba,0x4dcc,0x4de6,0x4df3,0x4e06,0x4e17,0x3111,0x4e24,0x3255,0x4e33,0x3370, -1,0,6,0x4e5f,0x4e72,0x4e82,0x4e90,0x4ea1,0x4eb1,0x33cc,0x12,0,1,0x4edb,0x4ee1,0x33d9, -0x12,0,1,0x4edb,0x4ee1,0x33e6,1,0,3,0x4edb,0x4ee1,0x4f1a,0x33fc,1,0,3, -0x4edb,0x4ee1,0x4f1a,0x3412,1,0,0x12,0x4fa4,0x4fae,0x4fba,0x4fc1,0x4fcc,0x4fd1,0x4fd8,0x4fdf,0x4fe8, -0x4fed,0x4ff2,0x5002,0x870,0x429f,0x500e,0x42aa,0x501e,0x42b9,0x34bb,1,0,0xf,0x4fa4,0x5045,0x504f, -0x5059,0x5064,0x4175,0x506e,0x507a,0x5082,0x5089,0x5093,0x4fba,0x4fc1,0x4fd1,0x509d,0x3542,1,0,0x17, -0x4fa4,0x50ba,0x5059,0x50c6,0x50d3,0x50e1,0x4175,0x50ec,0x4fba,0x50fd,0x4fd1,0x510c,0x511a,0x870,0x428d,0x5126, -0x5137,0x429f,0x500e,0x42aa,0x501e,0x42b9,0x5148,0x365f,1,0,3,0x517b,0x5183,0x518b,0x3678,1, -0,0x10,0x51b4,0x51bb,0x51ca,0x51eb,0x520e,0x5219,0x5238,0x524f,0x525c,0x5265,0x5284,0x52b7,0x52d2,0x5301, -0x531e,0x5343,0x3711,1,0,0x24,0x5392,0x539f,0x53b2,0x53bf,0x53ec,0x5411,0x5426,0x5445,0x5466,0x5493, -0x54cc,0x54ef,0x5512,0x553f,0x5574,0x559b,0x55c4,0x55fb,0x562a,0x564b,0x5670,0x567f,0x56a2,0x56b9,0x56c6,0x56d5, -0x56f2,0x570b,0x572e,0x5753,0x576c,0x5781,0x5790,0x57a1,0x57ae,0x57cf,0x38e1,1,0,4,0x580d,0x5818, -0x5830,0x5848,0x391d,0x36,1,2,4,8,0xe,0x10,0x20,0x3e,0x40,0x80,0x100,0x1c0, -0x200,0x400,0x800,0xe00,0x1000,0x2000,0x4000,0x7000,0x8000,0x10000,0x20000,0x40000,0x78001,0x80000,0x100000,0x200000, -0x400000,0x800000,0x1000000,0x2000000,0x4000000,0x8000000,0xf000000,0x10000000,0x20000000,0x30f80000,0x3445,0x3454,0x3469,0x347e,0x5881,0x3493, -0x34a7,0x5877,0x34b8,0x34cc,0x34df,0x5892,0x34f0,0x3509,0x351b,0x58a9,0x352c,0x3540,0x3553,0x58d2,0x356b,0x357d, -0x3588,0x3598,0x586e,0x35a6,0x35bb,0x35d0,0x35e6,0x3600,0x3616,0x3626,0x363a,0x364e,0x58c8,0x365f,0x3677,0x58b3 +const int32_t PropNameData::valueMaps[1436]={ +6,0,0x4b,0,0xf7,0x368,0xf7,0x37e,0xf7,0x393,0xf7,0x3a9,0xf7,0x3b4,0xf7,0x3d5, +0xf7,0x3e5,0xf7,0x3f4,0xf7,0x402,0xf7,0x426,0xf7,0x43d,0xf7,0x455,0xf7,0x46c,0xf7,0x47b, +0xf7,0x48a,0xf7,0x49b,0xf7,0x4a9,0xf7,0x4bb,0xf7,0x4d5,0xf7,0x4f0,0xf7,0x505,0xf7,0x522, +0xf7,0x533,0xf7,0x53e,0xf7,0x55d,0xf7,0x573,0xf7,0x584,0xf7,0x594,0xf7,0x5af,0xf7,0x5c8, +0xf7,0x5d9,0xf7,0x5f3,0xf7,0x606,0xf7,0x616,0xf7,0x630,0xf7,0x649,0xf7,0x660,0xf7,0x674, +0xf7,0x68a,0xf7,0x69e,0xf7,0x6b4,0xf7,0x6ce,0xf7,0x6e6,0xf7,0x702,0xf7,0x70a,0xf7,0x712, +0xf7,0x71a,0xf7,0x722,0xf7,0x72b,0xf7,0x738,0xf7,0x74b,0xf7,0x768,0xf7,0x785,0xf7,0x7a2, +0xf7,0x7c0,0xf7,0x7de,0xf7,0x802,0xf7,0x80f,0xf7,0x829,0xf7,0x83e,0xf7,0x859,0xf7,0x870, +0xf7,0x887,0xf7,0x8a9,0xf7,0x8c8,0xf7,0x8e1,0xf7,0x90e,0xf7,0x947,0xf7,0x978,0xf7,0x9a7, +0xf7,0x9d6,0xf7,0x9eb,0xf7,0xa04,0xf7,0xa2f,0xf7,0x1000,0x1019,0xa60,0x173,0xc80,0x18e,0x331c, +0xfd,0x333b,0x2db,0x3479,0x2f1,0x34d3,0x2fb,0x3730,0x31d,0x405b,0x389,0x40cb,0x393,0x43b0,0x3c7,0x43ee, +0x3cf,0x4f33,0x49b,0x4fb1,0x4a5,0x4fd6,0x4ab,0x4ff0,0x4b1,0x5011,0x4b8,0x502b,0xfd,0x5050,0xfd,0x5076, +0x4bf,0x5120,0x4d5,0x5199,0x4e8,0x524b,0x503,0x5282,0x50a,0x5462,0x51e,0x58e2,0x546,0x2000,0x2001,0x5941, +0x54e,0x3000,0x3001,0x59cd,0,0x4000,0x400e,0x59df,0,0x59e8,0,0x5a02,0,0x5a13,0,0x5a24, +0,0x5a3a,0,0x5a43,0,0x5a60,0,0x5a7e,0,0x5a9c,0,0x5aba,0,0x5ad0,0,0x5ae4, +0,0x5afa,0,0x7000,0x7001,0x5b13,0,0x87a,0x12,0,1,0x12,0x20,0x898,0x4a,0, +1,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14, +0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, +0x54,0x5b,0x67,0x6b,0x76,0x7a,0x81,0x82,0x84,0x85,0xc8,0xca,0xd6,0xd8,0xda,0xdc, +0xde,0xe0,0xe2,0xe4,0xe6,0xe8,0xe9,0xea,0xf0,0x2e,0x40,0x4c,0x5e,0x68,0x79,0x84, +0x91,0x9e,0xab,0xb8,0xc5,0xd2,0xdf,0xec,0xf9,0x106,0x113,0x120,0x12d,0x13a,0x147,0x154, +0x161,0x16e,0x17b,0x188,0x195,0x1a2,0x1af,0x1bc,0x1c9,0x1d6,0x1e3,0x1f0,0x1fd,0x20c,0x21b,0x22a, +0x239,0x248,0x257,0x266,0x275,0x28f,0x2a3,0x2b7,0x2d2,0x2e1,0x2ea,0x2fa,0x302,0x30b,0x31a,0x323, +0x333,0x344,0x355,0xa39,1,0,0x17,0xa6f,0xa80,0xa91,0xaa5,0xabc,0xad4,0xae6,0xafb,0xb12, +0xb27,0xb37,0xb49,0xb66,0xb82,0xb94,0xbb1,0xbcd,0xbe9,0xbfe,0xc13,0xc2d,0xc48,0xc63,0xbdb,1, +0,0x149,0xc8b,0xc98,0xcab,0xcd3,0xcf1,0xd0f,0xd27,0xd52,0xd7c,0xd94,0xda7,0xdba,0xdc9,0xdd8, +0xde7,0xdf6,0xe0d,0xe1e,0xe31,0xe44,0xe51,0xe5e,0xe6d,0xe7e,0xe93,0xea4,0xeaf,0xeb8,0xec9,0xeda, +0xeed,0xeff,0xf12,0xf25,0xf64,0xf71,0xf7e,0xf8b,0xfa0,0xfd0,0xfea,0x100b,0x1036,0x1059,0x10b7,0x10de, +0x10f9,0x1108,0x112f,0x1157,0x117a,0x119d,0x11c7,0x11e0,0x11ff,0x1222,0x1246,0x1259,0x1273,0x129d,0x12b5,0x12dd, +0x1306,0x1319,0x132c,0x133f,0x1366,0x1375,0x1395,0x13c3,0x13e1,0x140f,0x142b,0x1446,0x145f,0x1478,0x1499,0x14c9, +0x14e8,0x150a,0x153e,0x156b,0x15b0,0x15d1,0x15fb,0x161c,0x1645,0x1658,0x168b,0x16a2,0x16b1,0x16c2,0x16ed,0x1704, +0x1735,0x1763,0x17a6,0x17b1,0x17ea,0x17fb,0x180c,0x1819,0x182c,0x1866,0x188a,0x18ae,0x18e8,0x1920,0x194b,0x1963, +0x198f,0x19bb,0x19c8,0x19d7,0x19f4,0x1a16,0x1a44,0x1a64,0x1a8b,0x1ab2,0x1ad1,0x1ae4,0x1af5,0x1b06,0x1b2b,0x1b50, +0x1b77,0x1bab,0x1bd8,0x1bf6,0x1c09,0x1c22,0x1c5b,0x1c6a,0x1c8a,0x1cac,0x1cce,0x1ce5,0x1cfc,0x1d29,0x1d42,0x1d5b, +0x1d8c,0x1db6,0x1dd1,0x1de4,0x1e03,0x1e0c,0x1e1f,0x1e3d,0x1e5b,0x1e6e,0x1e85,0x1e9a,0x1ecf,0x1ef3,0x1f08,0x1f17, +0x1f2a,0x1f4e,0x1f57,0x1f7b,0x1f92,0x1fa5,0x1fb4,0x1fbf,0x1fe0,0x1ff8,0x2007,0x2016,0x2025,0x203c,0x2051,0x2066, +0x209f,0x20b2,0x20ce,0x20d9,0x20e6,0x2114,0x2138,0x215b,0x216e,0x2190,0x21a3,0x21be,0x21e1,0x2204,0x2229,0x223a, +0x2269,0x2296,0x22ad,0x22c8,0x22d7,0x2302,0x233a,0x2374,0x23a2,0x23b3,0x23c0,0x23e4,0x23f3,0x240f,0x2429,0x2446, +0x247e,0x2493,0x24c0,0x24df,0x250d,0x252d,0x2561,0x2570,0x259a,0x25bd,0x25e8,0x25f3,0x2604,0x261f,0x2643,0x2650, +0x2665,0x268c,0x26b7,0x26ee,0x2701,0x2712,0x2742,0x2753,0x2762,0x2777,0x2795,0x27a8,0x27bb,0x27d2,0x27ef,0x27fa, +0x2803,0x2825,0x283a,0x285f,0x2876,0x289f,0x28ba,0x28cf,0x28e8,0x2909,0x293e,0x294f,0x2980,0x29a4,0x29b5,0x29ce, +0x29d9,0x2a06,0x2a28,0x2a56,0x2a89,0x2a98,0x2aa9,0x2ac6,0x2b08,0x2b2f,0x2b3c,0x2b51,0x2b75,0x2b9b,0x2bd4,0x2be5, +0x2c09,0x2c14,0x2c21,0x2c30,0x2c55,0x2c83,0x2c9f,0x2cbc,0x2cc9,0x2cda,0x2cf8,0x2d1b,0x2d38,0x2d45,0x2d65,0x2d82, +0x2da3,0x2dcc,0x2ddd,0x2dfc,0x2e15,0x2e2e,0x2e3f,0x2e88,0x2e99,0x2eb2,0x2ee1,0x2f0e,0x2f33,0x2f75,0x2f91,0x2fa0, +0x2fb7,0x2fe5,0x2ffe,0x3027,0x3041,0x307c,0x309a,0x30a9,0x30c9,0x30e4,0x3108,0x3124,0x3142,0x3160,0x3177,0x3186, +0x3191,0x31ce,0x31e1,0x320b,0x322b,0x3259,0x327d,0x32a5,0x32ca,0x32d5,0x32ee,0x1fe5,1,0,0x12,0x3352, +0x3362,0x3375,0x3385,0x3395,0x33a4,0x33b4,0x33c6,0x33d9,0x33eb,0x33fb,0x340b,0x341a,0x3429,0x3439,0x3446,0x3455, +0x3469,0x20a3,1,0,6,0x348e,0x3499,0x34a6,0x34b3,0x34c0,0x34cb,0x20e7,1,0,0x1e,0x34e8, +0x34f7,0x350c,0x3521,0x3536,0x354a,0x355b,0x356f,0x3582,0x3593,0x35ac,0x35be,0x35cf,0x35e3,0x35f6,0x360e,0x3620, +0x362b,0x363b,0x3649,0x365e,0x3673,0x3689,0x36a3,0x36b9,0x36c9,0x36dd,0x36f1,0x3702,0x371a,0x2312,1,0, +0x68,0x3742,0x3765,0x376e,0x377b,0x3786,0x378f,0x379a,0x37a3,0x37bc,0x37c1,0x37ca,0x37e7,0x37f0,0x37fd,0x3806, +0x382a,0x3831,0x383a,0x384d,0x3858,0x3861,0x386c,0x3885,0x388e,0x389d,0x38a8,0x38b1,0x38bc,0x38c5,0x38cc,0x38d5, +0x38e0,0x38e9,0x3902,0x390b,0x3918,0x3923,0x3934,0x393f,0x3954,0x396b,0x3974,0x397d,0x3996,0x39a1,0x39aa,0x39b3, +0x39ca,0x39e7,0x39f2,0x3a03,0x3a0e,0x3a15,0x3a22,0x3a2f,0x3a5c,0x3a71,0x3a7a,0x3a95,0x3ab8,0x3ad9,0x3afa,0x3b1f, +0x3b46,0x3b67,0x3b8a,0x3bab,0x3bd2,0x3bf3,0x3c18,0x3c37,0x3c56,0x3c75,0x3c92,0x3cb3,0x3cd4,0x3cf7,0x3d1c,0x3d3b, +0x3d5a,0x3d7b,0x3da2,0x3dc7,0x3de6,0x3e07,0x3e2a,0x3e45,0x3e5e,0x3e79,0x3e92,0x3eaf,0x3eca,0x3ee7,0x3f06,0x3f23, +0x3f40,0x3f5f,0x3f7c,0x3f97,0x3fb4,0x3fd1,0x4004,0x402b,0x403e,0x2675,1,0,6,0x406c,0x407b,0x408b, +0x409b,0x40ab,0x40bc,0x26d3,1,0,0x30,0x40da,0x40e6,0x40f4,0x4103,0x4112,0x4122,0x4133,0x4147,0x415c, +0x4172,0x4185,0x4199,0x41a9,0x41b2,0x41bd,0x41cd,0x41e9,0x41fb,0x4209,0x4218,0x4224,0x4239,0x424d,0x4260,0x426e, +0x4282,0x4290,0x429a,0x42ac,0x42b8,0x42c6,0x42d6,0x42dd,0x42e4,0x42eb,0x42f2,0x42f9,0x430f,0x4330,0x870,0x4342, +0x434d,0x435c,0x4365,0x4370,0x4383,0x4394,0x43a5,0x2963,1,0,4,0x43c1,0x43cc,0x43d8,0x43e2,0x2989, +1,0,0xc8,0x43f9,0x4406,0x441b,0x4428,0x4437,0x4445,0x4454,0x4463,0x4475,0x4484,0x4492,0x44a3,0x44b2, +0x44c1,0x44ce,0x44da,0x44e9,0x44f8,0x4502,0x450f,0x451c,0x452b,0x4539,0x4548,0x4554,0x455e,0x456a,0x457a,0x458a, +0x4598,0x45a4,0x45b5,0x45c1,0x45cd,0x45db,0x45e8,0x45f4,0x4601,0xea4,0x460e,0x461c,0x4636,0x463f,0x464d,0x465b, +0x4667,0x4676,0x4684,0x4692,0x469e,0x46ad,0x46bb,0x46c9,0x46d6,0x46e5,0x4700,0x470f,0x4720,0x4731,0x4744,0x4756, +0x4765,0x4777,0x4786,0x4792,0x479d,0x1fb4,0x47aa,0x47b5,0x47c0,0x47cb,0x47d6,0x47f1,0x47fc,0x4807,0x4812,0x4825, +0x4839,0x4844,0x4853,0x4862,0x486d,0x4878,0x4885,0x4894,0x48a2,0x48ad,0x48c8,0x48d2,0x48e3,0x48f4,0x4903,0x4914, +0x491f,0x492a,0x4935,0x4940,0x494b,0x4956,0x4961,0x496b,0x4976,0x4986,0x4991,0x499f,0x49ac,0x49b7,0x49c6,0x49d3, +0x49e0,0x49ef,0x49fc,0x4a0d,0x4a1f,0x4a2f,0x4a3a,0x4a4d,0x4a64,0x4a72,0x4a7f,0x4a8a,0x4a97,0x4aa8,0x4ac4,0x4ada, +0x4ae5,0x4b02,0x4b12,0x4b21,0x4b2c,0x4b37,0x20ce,0x4b43,0x4b4e,0x4b66,0x4b76,0x4b85,0x4b93,0x4ba1,0x4bac,0x4bb7, +0x4bcb,0x4be2,0x4bfa,0x4c0a,0x4c1a,0x4c2a,0x4c3c,0x4c47,0x4c52,0x4c5c,0x4c68,0x4c76,0x4c89,0x4c95,0x4ca2,0x4cad, +0x4cc9,0x4cd6,0x4ce4,0x4cfd,0x29ce,0x4d0c,0x27ef,0x4d19,0x4d27,0x4d39,0x4d47,0x4d53,0x4d63,0x2c09,0x4d71,0x4d7d, +0x4d88,0x4d93,0x4d9e,0x4db2,0x4dc0,0x4dd7,0x4de3,0x4df7,0x4e05,0x4e17,0x4e2d,0x4e3b,0x4e4d,0x4e5b,0x4e78,0x4e8a, +0x4e97,0x4ea8,0x4eba,0x4ed4,0x4ee1,0x4ef4,0x4f05,0x3186,0x4f12,0x32ca,0x4f21,0x33e3,1,0,6,0x4f4d, +0x4f60,0x4f70,0x4f7e,0x4f8f,0x4f9f,0x343f,0x12,0,1,0x4fc9,0x4fcf,0x344c,0x12,0,1,0x4fc9, +0x4fcf,0x3459,1,0,3,0x4fc9,0x4fcf,0x5008,0x346f,1,0,3,0x4fc9,0x4fcf,0x5008,0x3485, +1,0,0x12,0x5092,0x509c,0x50a8,0x50af,0x50ba,0x50bf,0x50c6,0x50cd,0x50d6,0x50db,0x50e0,0x50f0,0x870, +0x4342,0x50fc,0x434d,0x510c,0x435c,0x352e,1,0,0xf,0x5092,0x5133,0x513d,0x5147,0x5152,0x4218,0x515c, +0x5168,0x5170,0x5177,0x5181,0x50a8,0x50af,0x50bf,0x518b,0x35b5,1,0,0x17,0x5092,0x51a8,0x5147,0x51b4, +0x51c1,0x51cf,0x4218,0x51da,0x50a8,0x51eb,0x50bf,0x51fa,0x5208,0x870,0x4330,0x5214,0x5225,0x4342,0x50fc,0x434d, +0x510c,0x435c,0x5236,0x36d2,1,0,3,0x5269,0x5271,0x5279,0x36eb,1,0,0x10,0x52a2,0x52a9, +0x52b8,0x52d9,0x52fc,0x5307,0x5326,0x533d,0x534a,0x5353,0x5372,0x53a5,0x53c0,0x53ef,0x540c,0x5431,0x3784,1, +0,0x24,0x5480,0x548d,0x54a0,0x54ad,0x54da,0x54ff,0x5514,0x5533,0x5554,0x5581,0x55ba,0x55dd,0x5600,0x562d, +0x5662,0x5689,0x56b2,0x56e9,0x5718,0x5739,0x575e,0x576d,0x5790,0x57a7,0x57b4,0x57c3,0x57e0,0x57f9,0x581c,0x5841, +0x585a,0x586f,0x587e,0x588f,0x589c,0x58bd,0x3954,1,0,4,0x58fb,0x5906,0x591e,0x5936,0x3990,0x36, +1,2,4,8,0xe,0x10,0x20,0x3e,0x40,0x80,0x100,0x1c0,0x200,0x400,0x800,0xe00, +0x1000,0x2000,0x4000,0x7000,0x8000,0x10000,0x20000,0x40000,0x78001,0x80000,0x100000,0x200000,0x400000,0x800000,0x1000000,0x2000000, +0x4000000,0x8000000,0xf000000,0x10000000,0x20000000,0x30f80000,0x34e8,0x34f7,0x350c,0x3521,0x596f,0x3536,0x354a,0x5965,0x355b,0x356f, +0x3582,0x5980,0x3593,0x35ac,0x35be,0x5997,0x35cf,0x35e3,0x35f6,0x59c0,0x360e,0x3620,0x362b,0x363b,0x595c,0x3649, +0x365e,0x3673,0x3689,0x36a3,0x36b9,0x36c9,0x36dd,0x36f1,0x59b6,0x3702,0x371a,0x59a1 }; -const uint8_t PropNameData::bytesTries[15412]={ +const uint8_t PropNameData::bytesTries[15527]={ 0,0x15,0x6d,0xc3,0xc7,0x73,0xc2,0x12,0x76,0x7a,0x76,0x6a,0x77,0xa2,0x52,0x78, 1,0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74, 0x63,0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74, @@ -166,913 +167,920 @@ const uint8_t PropNameData::bytesTries[15412]={ 0x30,0x77,0x10,0x73,0x77,0x11,0x79,0x6e,0x75,0x12,0x65,0x72,0x6e,1,0x73,0x38, 0x77,0x18,0x68,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x77,0x14,0x79,0x6e,0x74, 0x61,0x78,0x75,0x10,0x6d,0x9f,1,0x6d,0x3c,0x75,0x1a,0x6f,0x74,0x61,0x74,0x69, -0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x53,0x12,0x61,0x72,0x6b,0x53,0x66,0xc1,0xf8,0x69, -0xc1,0x3c,0x69,0xa2,0x6f,0x6a,0xa4,9,0x6c,4,0x62,0xc3,8,0x63,0x8c,0x65, +0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x53,0x12,0x61,0x72,0x6b,0x53,0x66,0xc2,0x2e,0x69, +0xc1,0x72,0x69,0xa2,0x6f,0x6a,0xa4,0x3f,0x6c,4,0x62,0xc3,8,0x63,0x8c,0x65, 0x98,0x69,0xa2,0x56,0x6f,2,0x65,0x4b,0x67,0x4c,0x77,0x11,0x65,0x72,0x4c,0x13, 0x63,0x61,0x73,0x65,0x4c,0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,4, 0x11,0x69,0x63,0x1f,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x65,0x78,0x63,0x65,0x70, 0x74,0x69,0x6f,0x6e,0x4b,0xd8,0x40,4,0x11,0x63,0x63,0xc3,0x10,0x18,0x61,0x64, 0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69,0x6e, 0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x10,0x16,0x6e,0x65,0x62,0x72,0x65, -0x61,0x6b,0xc3,8,2,0x64,0x4a,0x6e,0xa2,0x5b,0x73,1,0x63,0xd9,0x40,3, -0x6f,0x16,0x63,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0xd9,0x40,3,2,0x63,0x80,0x65, -0x90,0x73,0x40,1,0x62,0x52,0x74,0x46,1,0x61,0x40,0x72,0x1c,0x69,0x6e,0x61, -0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x47,0x11,0x72,0x74,0x41,0x44, -0x1c,0x69,0x6e,0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x45,0x3e, -0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3f,0x10,0x6f,0x42,0x16,0x67,0x72,0x61, -0x70,0x68,0x69,0x63,0x43,2,0x64,0x2e,0x70,0x86,0x73,0x10,0x63,0xc3,0x17,0x11, -0x69,0x63,1,0x70,0x46,0x73,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x63,0x61, -0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x17,0x10,0x6f,0x1f,0x73,0x69,0x74,0x69,0x6f, -0x6e,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x16,0x10,0x63,0xc3, -0x16,2,0x67,0xc3,6,0x6f,0x26,0x74,0xc3,7,0x11,0x69,0x6e,1,0x63,0x4a, -0x69,0x11,0x6e,0x67,1,0x67,0x2e,0x74,0x12,0x79,0x70,0x65,0xc3,7,0x13,0x72, -0x6f,0x75,0x70,0xc3,6,0x48,0x15,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x49,0x66,0x86, -0x67,0xa2,0x4a,0x68,3,0x61,0x36,0x65,0x58,0x73,0x68,0x79,0x13,0x70,0x68,0x65, -0x6e,0x3d,0x1f,0x6e,0x67,0x75,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x74, -0x79,0x70,0x65,0xc3,0xb,0x10,0x78,0x3a,0x14,0x64,0x69,0x67,0x69,0x74,0x3b,0x10, -0x74,0xc3,0xb,0x16,0x75,0x6c,0x6c,0x63,0x6f,0x6d,0x70,0x1f,0x6f,0x73,0x69,0x74, -0x69,0x6f,0x6e,0x65,0x78,0x63,0x6c,0x75,0x73,0x69,0x6f,0x6e,0x33,2,0x63,0xa2, -0x44,0x65,0xa2,0x4b,0x72,3,0x61,0x34,0x62,0x84,0x65,0x8a,0x6c,0x12,0x69,0x6e, -0x6b,0x39,0x11,0x70,0x68,0x7c,0x12,0x65,0x6d,0x65,3,0x62,0x5e,0x63,0x30,0x65, -0x48,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x1a,0x6c,0x75,0x73,0x74,0x65,0x72,0x62,0x72, -0x65,0x61,0x6b,0xc3,0x12,0x14,0x78,0x74,0x65,0x6e,0x64,0x37,0x12,0x61,0x73,0x65, -0x35,0x11,0x78,0x74,0x37,0xc2,5,1,0x62,0xc3,0x12,0x6d,0xd9,0x20,0,0x1c, -0x6e,0x65,0x72,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc2,5,0x13, -0x6d,0x61,0x73,0x6b,0xd9,0x20,0,0x61,0xa2,0xa2,0x62,0xa2,0xd0,0x63,0xa4,0x4f, -0x64,0xa6,0x1c,0x65,5,0x6d,0x75,0x6d,0x6e,0x70,0xa2,0x6b,0x78,0x10,0x74,0x30, -1,0x65,0x2c,0x70,0x12,0x69,0x63,0x74,0xa1,0x12,0x6e,0x64,0x65,1,0x64,0x24, -0x72,0x31,0x1b,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,0xa1, -0x10,0x6f,1,0x64,0x97,0x6a,0x10,0x69,0x92,3,0x63,0x44,0x6b,0x54,0x6d,0x70, -0x70,0x1a,0x72,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x95,0x17,0x6f, -0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x9b,0x1c,0x65,0x79,0x63,0x61,0x70,0x73,0x65, -0x71,0x75,0x65,0x6e,0x63,0x65,0xa3,0x42,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72, -0x96,0x13,0x62,0x61,0x73,0x65,0x99,0x12,0x72,0x65,0x73,0x95,0x61,0x30,0x62,0x4e, -0x63,0x12,0x6f,0x6d,0x70,0x9b,0xc2,4,0x1b,0x73,0x74,0x61,0x73,0x69,0x61,0x6e, -0x77,0x69,0x64,0x74,0x68,0xc3,4,0x12,0x61,0x73,0x65,0x99,3,0x67,0x44,0x68, -0x4a,0x6c,0x4e,0x73,0x1a,0x63,0x69,0x69,0x68,0x65,0x78,0x64,0x69,0x67,0x69,0x74, -0x23,0x10,0x65,0xd9,0x40,0,0x11,0x65,0x78,0x23,1,0x6e,0x38,0x70,0x11,0x68, -0x61,0x20,0x14,0x62,0x65,0x74,0x69,0x63,0x21,0x11,0x75,0x6d,0x79,5,0x6c,0x22, -0x6c,0x36,0x6d,0x52,0x70,1,0x62,0xd9,0x40,0xd,0x74,0xc3,0x15,2,0x61,0x32, -0x6b,0xc3,1,0x6f,0x11,0x63,0x6b,0xc3,1,0x11,0x6e,0x6b,0x7b,0x10,0x67,0xd9, -0x40,1,0x61,0xa2,0x4f,0x63,0xc3,0,0x69,0x11,0x64,0x69,2,0x63,0x54,0x6d, -0x74,0x70,0x1b,0x61,0x69,0x72,0x65,0x64,0x62,0x72,0x61,0x63,0x6b,0x65,0x74,0xd8, -0x40,0xd,0x13,0x74,0x79,0x70,0x65,0xc3,0x15,0x24,1,0x6c,0x30,0x6f,0x14,0x6e, -0x74,0x72,0x6f,0x6c,0x25,0x12,0x61,0x73,0x73,0xc3,0,0x26,0x14,0x69,0x72,0x72, -0x6f,0x72,1,0x65,0x38,0x69,0x16,0x6e,0x67,0x67,0x6c,0x79,0x70,0x68,0xd9,0x40, -1,0x10,0x64,0x27,0x17,0x73,0x69,0x63,0x65,0x6d,0x6f,0x6a,0x69,0xa3,0x41,6, -0x68,0x7c,0x68,0x54,0x69,0x85,0x6f,0xa2,0x6f,0x77,4,0x63,0x30,0x6b,0x36,0x6c, -0x87,0x74,0x8b,0x75,0x89,1,0x66,0x8d,0x6d,0x8f,0x11,0x63,0x66,0x91,0x18,0x61, -0x6e,0x67,0x65,0x73,0x77,0x68,0x65,0x6e,4,0x63,0x44,0x6c,0x6c,0x6e,0x7e,0x74, -0x98,0x75,0x18,0x70,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x89,0x12,0x61,0x73, -0x65,1,0x66,0x30,0x6d,0x14,0x61,0x70,0x70,0x65,0x64,0x8f,0x14,0x6f,0x6c,0x64, -0x65,0x64,0x8d,0x18,0x6f,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x87,0x1c,0x66, -0x6b,0x63,0x63,0x61,0x73,0x65,0x66,0x6f,0x6c,0x64,0x65,0x64,0x91,0x18,0x69,0x74, -0x6c,0x65,0x63,0x61,0x73,0x65,0x64,0x8b,0x13,0x6d,0x70,0x65,0x78,0x33,0x61,0x2e, -0x63,0xa2,0x48,0x66,0xd9,0x40,2,1,0x6e,0x72,0x73,0x10,0x65,3,0x64,0x83, -0x66,0x3a,0x69,0x4a,0x73,0x17,0x65,0x6e,0x73,0x69,0x74,0x69,0x76,0x65,0x65,0x15, -0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,2,0x17,0x67,0x6e,0x6f,0x72,0x61,0x62, -0x6c,0x65,0x85,0x13,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69, -0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,2,0x10,0x63,0xc3,2,3, -0x61,0x30,0x65,0x34,0x69,0xa2,0x41,0x74,0xc3,3,0x11,0x73,0x68,0x29,2,0x63, -0x3a,0x66,0x58,0x70,0x2c,0x16,0x72,0x65,0x63,0x61,0x74,0x65,0x64,0x2d,0x1d,0x6f, -0x6d,0x70,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x74,0x79,0x70,0x65,0xc3,3,0x15, -0x61,0x75,0x6c,0x74,0x69,0x67,0x1f,0x6e,0x6f,0x72,0x61,0x62,0x6c,0x65,0x63,0x6f, -0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x2b,0x2a,0x10,0x61,0x2e,0x15,0x63,0x72,0x69, -0x74,0x69,0x63,0x2f,3,0x66,0x34,0x6e,0x3e,0x74,0x42,0x79,0x22,0x11,0x65,0x73, -0x23,0x20,0x13,0x61,0x6c,0x73,0x65,0x21,0x20,0x10,0x6f,0x21,0x22,0x12,0x72,0x75, -0x65,0x23,0xb,0x6b,0x5b,0x6f,0x23,0x6f,0x3c,0x72,0x4c,0x76,1,0x69,0x24,0x72, -0x33,0x13,0x72,0x61,0x6d,0x61,0x33,0x10,0x76,0x22,0x14,0x65,0x72,0x6c,0x61,0x79, -0x23,0xa2,0xe2,0x13,0x69,0x67,0x68,0x74,0xa3,0xe2,0x6b,0x58,0x6c,0x74,0x6e,3, -0x6b,0x2f,0x6f,0x30,0x72,0x21,0x75,0x12,0x6b,0x74,0x61,0x2f,0x19,0x74,0x72,0x65, -0x6f,0x72,0x64,0x65,0x72,0x65,0x64,0x21,1,0x61,0x24,0x76,0x31,0x18,0x6e,0x61, -0x76,0x6f,0x69,0x63,0x69,0x6e,0x67,0x31,0xa2,0xe0,0x12,0x65,0x66,0x74,0xa3,0xe0, -0x64,0x45,0x64,0x4e,0x68,0x88,0x69,1,0x6f,0x26,0x73,0xa3,0xf0,0x1a,0x74,0x61, -0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xf0,2,0x61,0xa3,0xea,0x62, -0xa3,0xe9,0x6f,0x13,0x75,0x62,0x6c,0x65,1,0x61,0x30,0x62,0x13,0x65,0x6c,0x6f, -0x77,0xa3,0xe9,0x13,0x62,0x6f,0x76,0x65,0xa3,0xea,0x12,0x61,0x6e,0x72,0x2c,0x15, -0x65,0x61,0x64,0x69,0x6e,0x67,0x2d,0x61,0xa2,0x7b,0x62,0xa2,0xd4,0x63,0x11,0x63, -0x63,4,0x31,0x3c,0x32,0xa2,0x42,0x33,0xa2,0x56,0x38,0xa2,0x64,0x39,0x10,0x31, -0xa3,0x5b,9,0x35,0xa,0x35,0x3f,0x36,0x41,0x37,0x43,0x38,0x45,0x39,0x47,0x30, -0x30,0x31,0x3c,0x32,0x42,0x33,0x4e,0x34,0x3d,0x34,1,0x33,0xa3,0x67,0x37,0xa3, -0x6b,0x36,0x10,0x38,0xa3,0x76,0x38,1,0x32,0xa3,0x7a,0x39,0xa3,0x81,0x3a,2, -0x30,0xa3,0x82,0x32,0xa3,0x84,0x33,0xa3,0x85,9,0x35,0xa,0x35,0x53,0x36,0x55, -0x37,0x57,0x38,0x59,0x39,0x5b,0x30,0x49,0x31,0x4b,0x32,0x4d,0x33,0x4f,0x34,0x51, -6,0x33,8,0x33,0x63,0x34,0x65,0x35,0x67,0x36,0x69,0x30,0x5d,0x31,0x5f,0x32, -0x61,0x10,0x34,0xa3,0x54,0xa2,0xe6,3,0x62,0xa0,0x6c,0xa3,0xe4,0x72,0xa3,0xe8, -0x74,2,0x61,0x74,0x62,0x7c,0x74,0x14,0x61,0x63,0x68,0x65,0x64,1,0x61,0x3e, -0x62,0x13,0x65,0x6c,0x6f,0x77,0xa2,0xca,0x13,0x6c,0x65,0x66,0x74,0xa3,0xc8,0x13, -0x62,0x6f,0x76,0x65,0xa2,0xd6,0x14,0x72,0x69,0x67,0x68,0x74,0xa3,0xd8,0xa2,0xd6, -0x10,0x72,0xa3,0xd8,0xa2,0xca,0x10,0x6c,0xa3,0xc8,0x12,0x6f,0x76,0x65,0xa2,0xe6, -1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xe8,0x12,0x65,0x66,0x74,0xa3, -0xe4,0xa2,0xdc,2,0x65,0x2c,0x6c,0xa3,0xda,0x72,0xa3,0xde,0x12,0x6c,0x6f,0x77, -0xa2,0xdc,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xde,0x12,0x65,0x66, -0x74,0xa3,0xda,0xb,0x6e,0xc0,0xca,0x72,0x5f,0x72,0x46,0x73,0xa2,0x48,0x77,1, -0x68,0x24,0x73,0x33,0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x33,0x22,1, -0x69,0x30,0x6c,2,0x65,0x3d,0x69,0x4b,0x6f,0x3f,0x18,0x67,0x68,0x74,0x74,0x6f, -0x6c,0x65,0x66,0x74,0x22,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72,0x72, -0x69,0x64,0x65,0x3f,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x3d,0x15,0x73, -0x6f,0x6c,0x61,0x74,0x65,0x4b,0x30,0x1e,0x65,0x67,0x6d,0x65,0x6e,0x74,0x73,0x65, -0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x31,0x6e,0xa2,0x41,0x6f,0xa2,0x53,0x70,2, -0x61,0x66,0x64,0x86,0x6f,0x1b,0x70,0x64,0x69,0x72,0x65,0x63,0x74,0x69,0x6f,0x6e, -0x61,0x6c,1,0x66,0x32,0x69,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4d,0x14,0x6f, -0x72,0x6d,0x61,0x74,0x41,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70, -0x61,0x72,0x61,0x74,0x6f,0x72,0x2f,1,0x66,0x41,0x69,0x4d,1,0x6f,0x28,0x73, -0x10,0x6d,0x43,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b, -0x43,1,0x6e,0x35,0x74,0x19,0x68,0x65,0x72,0x6e,0x65,0x75,0x74,0x72,0x61,0x6c, -0x35,0x65,0x88,0x65,0x98,0x66,0xa2,0x6a,0x6c,0x20,1,0x65,0x30,0x72,2,0x65, -0x37,0x69,0x49,0x6f,0x39,0x18,0x66,0x74,0x74,0x6f,0x72,0x69,0x67,0x68,0x74,0x20, -2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72,0x72,0x69,0x64,0x65,0x39,0x17, -0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x37,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65, -0x49,3,0x6e,0x25,0x73,0x27,0x74,0x29,0x75,0x15,0x72,0x6f,0x70,0x65,0x61,0x6e, -2,0x6e,0x3c,0x73,0x46,0x74,0x18,0x65,0x72,0x6d,0x69,0x6e,0x61,0x74,0x6f,0x72, -0x29,0x14,0x75,0x6d,0x62,0x65,0x72,0x25,0x17,0x65,0x70,0x61,0x72,0x61,0x74,0x6f, -0x72,0x27,1,0x69,0x28,0x73,0x10,0x69,0x47,0x1f,0x72,0x73,0x74,0x73,0x74,0x72, -0x6f,0x6e,0x67,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x47,0x61,0x4e,0x62,0x84,0x63, -1,0x6f,0x24,0x73,0x2d,0x1c,0x6d,0x6d,0x6f,0x6e,0x73,0x65,0x70,0x61,0x72,0x61, -0x74,0x6f,0x72,0x2d,2,0x6c,0x3b,0x6e,0x2b,0x72,0x13,0x61,0x62,0x69,0x63,1, -0x6c,0x30,0x6e,0x14,0x75,0x6d,0x62,0x65,0x72,0x2b,0x14,0x65,0x74,0x74,0x65,0x72, -0x3b,0x2e,1,0x6e,0x45,0x6f,0x1c,0x75,0x6e,0x64,0x61,0x72,0x79,0x6e,0x65,0x75, -0x74,0x72,0x61,0x6c,0x45,0,0x16,0x6d,0xc9,0x20,0x74,0xc2,0x30,0x77,0x89,0x77, -0x86,0x79,0xa2,0x46,0x7a,1,0x61,0x58,0x6e,0x1a,0x61,0x6d,0x65,0x6e,0x6e,0x79, -0x6d,0x75,0x73,0x69,0x63,0xa4,0x40,0x19,0x61,0x6c,0x6e,0x6f,0x74,0x61,0x74,0x69, -0x6f,0x6e,0xa5,0x40,0x1c,0x6e,0x61,0x62,0x61,0x7a,0x61,0x72,0x73,0x71,0x75,0x61, -0x72,0x65,0xa5,0x18,0x10,0x61,1,0x6e,0x36,0x72,0x16,0x61,0x6e,0x67,0x63,0x69, -0x74,0x69,0xa3,0xfc,0x12,0x63,0x68,0x6f,0xa5,0x2c,1,0x65,0x88,0x69,2,0x6a, -0x3c,0x72,0x68,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,0xa3,0x48,0x12, -0x69,0x6e,0x67,0xa2,0x74,0x1e,0x68,0x65,0x78,0x61,0x67,0x72,0x61,0x6d,0x73,0x79, -0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x74,0x16,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0xa3, -0x49,0x13,0x7a,0x69,0x64,0x69,0xa5,0x34,0x74,0xa2,0x65,0x75,0xa4,0x4f,0x76,3, -0x61,0x3c,0x65,0x80,0x69,0xa2,0x50,0x73,0xa2,0x6c,0x12,0x73,0x75,0x70,0xa3,0x7d, -1,0x69,0xa3,0x9f,0x72,0x1e,0x69,0x61,0x74,0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65, -0x63,0x74,0x6f,0x72,0x73,0xa2,0x6c,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65, -0x6e,0x74,0xa3,0x7d,1,0x64,0x3c,0x72,0x19,0x74,0x69,0x63,0x61,0x6c,0x66,0x6f, -0x72,0x6d,0x73,0xa3,0x91,0x14,0x69,0x63,0x65,0x78,0x74,0xa2,0xaf,0x16,0x65,0x6e, -0x73,0x69,0x6f,0x6e,0x73,0xa3,0xaf,0x15,0x74,0x68,0x6b,0x75,0x71,0x69,0xa5,0x3f, -5,0x69,0x3f,0x69,0x5a,0x6f,0x8c,0x72,0x1c,0x61,0x6e,0x73,0x70,0x6f,0x72,0x74, -0x61,0x6e,0x64,0x6d,0x61,0x70,0xa2,0xcf,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73, -0xa3,0xcf,2,0x62,0x34,0x66,0x3c,0x72,0x13,0x68,0x75,0x74,0x61,0xa3,0xfb,0x13, -0x65,0x74,0x61,0x6e,0x57,0x14,0x69,0x6e,0x61,0x67,0x68,0xa3,0x90,0x11,0x74,0x6f, -0xa5,0x3d,0x61,0x3e,0x65,0xa2,0xa0,0x68,0x10,0x61,1,0x61,0x24,0x69,0x53,0x11, -0x6e,0x61,0x3d,4,0x67,0x8e,0x69,0xa2,0x49,0x6b,0xa2,0x72,0x6d,0xa2,0x74,0x6e, -0x10,0x67,1,0x73,0x68,0x75,0x10,0x74,0xa4,0x10,1,0x63,0x40,0x73,0x11,0x75, -0x70,0xa4,0x33,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x33,0x18,0x6f,0x6d, -0x70,0x6f,0x6e,0x65,0x6e,0x74,0x73,0xa5,0x11,0x10,0x61,0xa5,0x3c,2,0x61,0x2a, -0x62,0x32,0x73,0xa3,0x60,0x12,0x6c,0x6f,0x67,0xa3,0x62,0x13,0x61,0x6e,0x77,0x61, -0xa3,0x65,3,0x6c,0x52,0x74,0x56,0x76,0x5e,0x78,0x16,0x75,0x61,0x6e,0x6a,0x69, -0x6e,0x67,0xa2,0x7c,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x7c,0x10,0x65, -0xa3,0x70,0x12,0x68,0x61,0x6d,0xa3,0xae,0x12,0x69,0x65,0x74,0xa3,0xb7,0x11,0x72, -0x69,0xa3,0xdc,0x11,0x69,0x6c,0x48,0x12,0x73,0x75,0x70,0xa4,0x2b,0x16,0x70,0x6c, -0x65,0x6d,0x65,0x6e,0x74,0xa5,0x2b,0x13,0x6c,0x75,0x67,0x75,0x4b,2,0x63,0x8c, -0x67,0xa2,0x41,0x6e,0x1f,0x69,0x66,0x69,0x65,0x64,0x63,0x61,0x6e,0x61,0x64,0x69, -0x61,0x6e,0x61,0x62,0x6f,0x1f,0x72,0x69,0x67,0x69,0x6e,0x61,0x6c,0x73,0x79,0x6c, -0x6c,0x61,0x62,0x69,0x63,0x73,0x62,0x17,0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64, -0xa2,0xad,0x10,0x61,0xa5,0x3e,0x11,0x61,0x73,0x62,0x12,0x65,0x78,0x74,0xa2,0xad, -0x10,0x61,0xa5,0x3e,0x15,0x61,0x72,0x69,0x74,0x69,0x63,0xa3,0x78,0x70,0xc3,0x4b, -0x70,0xa6,0x61,0x72,0xa8,0x1d,0x73,7,0x6f,0xc1,0xbe,0x6f,0xa2,0x69,0x70,0xa2, -0x85,0x75,0xa2,0xa4,0x79,2,0x6c,0x50,0x6d,0x62,0x72,0x12,0x69,0x61,0x63,0x3a, -0x12,0x73,0x75,0x70,0xa4,0x17,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x17, -0x17,0x6f,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0xa3,0x8f,0x13,0x62,0x6f,0x6c,0x73, -1,0x61,0x4c,0x66,0x10,0x6f,0x1f,0x72,0x6c,0x65,0x67,0x61,0x63,0x79,0x63,0x6f, -0x6d,0x70,0x75,0x74,0x69,0x6e,0x67,0xa5,0x32,0x1f,0x6e,0x64,0x70,0x69,0x63,0x74, -0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x78,0x74,1,0x61,0xa5,0x2a,0x65,0x14, -0x6e,0x64,0x65,0x64,0x61,0xa5,0x2a,2,0x67,0x34,0x72,0x3e,0x79,0x13,0x6f,0x6d, -0x62,0x6f,0xa5,0x16,0x13,0x64,0x69,0x61,0x6e,0xa5,0x23,0x17,0x61,0x73,0x6f,0x6d, -0x70,0x65,0x6e,0x67,0xa3,0xda,1,0x61,0x32,0x65,0x14,0x63,0x69,0x61,0x6c,0x73, -0xa3,0x56,0x12,0x63,0x69,0x6e,0x1f,0x67,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72, -0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,2,0x6e,0x48,0x70,0x76,0x74,0x1d,0x74, -0x6f,0x6e,0x73,0x69,0x67,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa5,6,0x15, -0x64,0x61,0x6e,0x65,0x73,0x65,0xa2,0x9b,0x12,0x73,0x75,0x70,0xa2,0xdb,0x16,0x70, -0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xdb,4,0x61,0xa2,0xa8,0x65,0x5c,0x6d,0x9e, -0x70,0xa2,0x4b,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70, -0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5,0x10,0x72,1,0x61, -0x4e,0x73,0x12,0x63,0x72,0x69,0x1f,0x70,0x74,0x73,0x61,0x6e,0x64,0x73,0x75,0x62, -0x73,0x63,0x72,0x69,0x70,0x74,0x73,0x73,0x14,0x6e,0x64,0x73,0x75,0x62,0x73,0x1b, -0x61,0x74,0x68,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,1,0x6c, -0x40,0x75,1,0x61,0x6e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3, -0x8e,0x15,0x65,0x6d,0x65,0x6e,0x74,0x61,1,0x6c,0x50,0x72,0x1e,0x79,0x70,0x72, -0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x61,0x72,0x65,0x61,1,0x61,0xa3,0x6d, -0x62,0xa3,0x6e,3,0x61,0x5c,0x6d,0x78,0x70,0xa2,0x41,0x73,0x13,0x79,0x6d,0x62, -0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70, -0x68,0x73,0xa5,5,0x14,0x72,0x72,0x6f,0x77,0x73,2,0x61,0xa3,0x67,0x62,0xa3, -0x68,0x63,0xa3,0xfa,0x13,0x61,0x74,0x68,0x65,0x1f,0x6d,0x61,0x74,0x69,0x63,0x61, -0x6c,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,0x19,0x75,0x6e,0x63, -0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x61,0x88,0x68,0xa2,0x48,0x69,0xa2, -0x71,0x6d,0x12,0x61,0x6c,0x6c,1,0x66,0x46,0x6b,0x15,0x61,0x6e,0x61,0x65,0x78, -0x74,0xa4,0x29,0x15,0x65,0x6e,0x73,0x69,0x6f,0x6e,0xa5,0x29,0x12,0x6f,0x72,0x6d, -1,0x73,0xa3,0x54,0x76,0x16,0x61,0x72,0x69,0x61,0x6e,0x74,0x73,0xa3,0x54,1, -0x6d,0x36,0x75,0x16,0x72,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0xa1,0x15,0x61,0x72, -0x69,0x74,0x61,0x6e,0xa3,0xac,1,0x61,0x52,0x6f,0x13,0x72,0x74,0x68,0x61,0x1f, -0x6e,0x64,0x66,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73, -0xa3,0xf7,1,0x72,0x2e,0x76,0x12,0x69,0x61,0x6e,0xa3,0x79,0x12,0x61,0x64,0x61, -0xa3,0xd9,1,0x64,0x50,0x6e,0x13,0x68,0x61,0x6c,0x61,0x50,0x1d,0x61,0x72,0x63, -0x68,0x61,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0xf9,0x13,0x64,0x68, -0x61,0x6d,0xa3,0xf8,5,0x72,0x35,0x72,0x44,0x73,0x64,0x75,1,0x61,0xa3,0x4e, -0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x71,0x17,0x69,0x76,0x61,0x74, -0x65,0x75,0x73,0x65,0xa2,0x4e,0x13,0x61,0x72,0x65,0x61,0xa3,0x4e,0x1b,0x61,0x6c, -0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0xf6,0x61,0x40,0x68,0x82, -0x6c,0x19,0x61,0x79,0x69,0x6e,0x67,0x63,0x61,0x72,0x64,0x73,0xa3,0xcc,2,0x68, -0x38,0x6c,0x4a,0x75,0x15,0x63,0x69,0x6e,0x68,0x61,0x75,0xa3,0xf5,0x17,0x61,0x77, -0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xf3,0x15,0x6d,0x79,0x72,0x65,0x6e,0x65,0xa3, -0xf4,1,0x61,0x8e,0x6f,1,0x65,0x74,0x6e,0x16,0x65,0x74,0x69,0x63,0x65,0x78, -0x74,0xa2,0x72,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,0x8d,0x15,0x6e,0x73,0x69, -0x6f,0x6e,0x73,0xa2,0x72,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74, -0xa3,0x8d,0x15,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x97,1,0x67,0x3e,0x69,0x13, -0x73,0x74,0x6f,0x73,0xa2,0xa6,0x13,0x64,0x69,0x73,0x63,0xa3,0xa6,0x12,0x73,0x70, -0x61,0xa3,0x96,1,0x65,0x5c,0x75,1,0x6d,0x2a,0x6e,0x11,0x69,0x63,0x67,0x10, -0x69,0xa2,0xc0,0x1d,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f, -0x6c,0x73,0xa3,0xc0,0x13,0x6a,0x61,0x6e,0x67,0xa3,0xa3,0x6d,0xa2,0xf0,0x6e,0xa8, -0x23,0x6f,6,0x70,0x63,0x70,0x56,0x72,0x8a,0x73,0xa2,0x4c,0x74,0x10,0x74,0x1f, -0x6f,0x6d,0x61,0x6e,0x73,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73, -0xa5,0x28,0x18,0x74,0x69,0x63,0x61,0x6c,0x63,0x68,0x61,0x72,0x1f,0x61,0x63,0x74, -0x65,0x72,0x72,0x65,0x63,0x6f,0x67,0x6e,0x69,0x74,0x69,0x6f,0x6e,0x85,1,0x69, -0x46,0x6e,0x1e,0x61,0x6d,0x65,0x6e,0x74,0x61,0x6c,0x64,0x69,0x6e,0x67,0x62,0x61, -0x74,0x73,0xa3,0xf2,0x11,0x79,0x61,0x47,1,0x61,0x30,0x6d,0x13,0x61,0x6e,0x79, -0x61,0xa3,0x7a,0x11,0x67,0x65,0xa5,0xf,0x63,0xa2,0x7b,0x67,0xa2,0x7b,0x6c,1, -0x63,0xa2,0x6c,0x64,6,0x70,0x42,0x70,0x3a,0x73,0x5a,0x74,0x88,0x75,0x14,0x79, -0x67,0x68,0x75,0x72,0xa5,0x3b,0x11,0x65,0x72,1,0x6d,0x2e,0x73,0x12,0x69,0x61, -0x6e,0xa3,0x8c,0x11,0x69,0x63,0xa3,0xf1,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74, -0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xbb,0x13,0x64,0x69,0x61,0x6e,0xa5, -0x22,0x14,0x75,0x72,0x6b,0x69,0x63,0xa3,0xbf,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f, -0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xf0,0x17,0x75,0x6e,0x67, -0x61,0x72,0x69,0x61,0x6e,0xa5,4,0x14,0x74,0x61,0x6c,0x69,0x63,0xa3,0x58,0x13, -0x68,0x69,0x6b,0x69,0xa3,0x9d,0x10,0x72,0x85,0x12,0x68,0x61,0x6d,0x65,6,0x6f, -0x86,0x6f,0x6c,0x72,0xa2,0x61,0x75,0xa2,0x62,0x79,0x14,0x61,0x6e,0x6d,0x61,0x72, -0x58,0x12,0x65,0x78,0x74,2,0x61,0xa3,0xb6,0x62,0xa3,0xee,0x65,0x13,0x6e,0x64, -0x65,0x64,1,0x61,0xa3,0xb6,0x62,0xa3,0xee,1,0x64,0x52,0x6e,0x15,0x67,0x6f, -0x6c,0x69,0x61,0x6e,0x6a,0x12,0x73,0x75,0x70,0xa4,0xd,0x16,0x70,0x6c,0x65,0x6d, -0x65,0x6e,0x74,0xa5,0xd,0x10,0x69,0xa2,0xec,0x13,0x66,0x69,0x65,0x72,1,0x6c, -0x3c,0x74,0x19,0x6f,0x6e,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0xa3,0x8a,0x15, -0x65,0x74,0x74,0x65,0x72,0x73,0x2d,0x10,0x6f,0xa3,0xed,1,0x6c,0x44,0x73,0x11, -0x69,0x63,0xa2,0x5c,0x18,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5c, -0x13,0x74,0x61,0x6e,0x69,0xa5,3,0x61,0xa2,0x9b,0x65,0xa4,0x4c,0x69,1,0x61, -0xa2,0x8f,0x73,0x10,0x63,5,0x70,0x18,0x70,0xa2,0x71,0x73,0x36,0x74,0x17,0x65, -0x63,0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x8f, -0x61,0xa2,0x66,0x65,0x46,0x6d,0x19,0x61,0x74,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c, -0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x17,0x6c,0x6c,0x61,0x6e,0x65,0x6f,0x75, -0x73,2,0x6d,0x3a,0x73,0x6c,0x74,0x17,0x65,0x63,0x68,0x6e,0x69,0x63,0x61,0x6c, -0x81,0x11,0x61,0x74,0x1f,0x68,0x65,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x73,0x79, -0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x15,0x79,0x6d,0x62, -0x6f,0x6c,0x73,0x8e,0x12,0x61,0x6e,0x64,1,0x61,0x3c,0x70,0x19,0x69,0x63,0x74, -0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa3,0xcd,0x14,0x72,0x72,0x6f,0x77,0x73,0xa3, -0x73,0x10,0x6f,0xa3,0xd8,7,0x72,0x6f,0x72,0x44,0x73,0x4e,0x74,0x62,0x79,0x19, -0x61,0x6e,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0xa5,0x20,0x13,0x63,0x68,0x65, -0x6e,0xa5,0xc,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x14,0x10, -0x68,2,0x61,0x3a,0x65,0x4a,0x6f,0x17,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73, -0x7f,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0xa3,0x5d,0x16,0x6d,0x61,0x74,0x69, -0x63,0x61,0x6c,1,0x61,0x36,0x6f,0x17,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73, -0x7f,0x11,0x6c,0x70,0x1f,0x68,0x61,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x73,0x79, -0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5d,0x68,0x50,0x6b,0x7e,0x6c,0x88,0x6e,1,0x64, -0x34,0x69,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,0xea,0x12,0x61,0x69,0x63,0xa3, -0xc6,1,0x61,0x3e,0x6a,0x12,0x6f,0x6e,0x67,0xa2,0xaa,0x14,0x74,0x69,0x6c,0x65, -0x73,0xa3,0xaa,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xe9,0x13,0x61,0x73,0x61,0x72,0xa5, -0x1f,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x4f,3,0x64,0x6c,0x65,0x7e,0x6e,0xa2, -0x47,0x72,0x14,0x6f,0x69,0x74,0x69,0x63,1,0x63,0x3c,0x68,0x19,0x69,0x65,0x72, -0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xd7,0x15,0x75,0x72,0x73,0x69,0x76,0x65, -0xa3,0xd6,0x17,0x65,0x66,0x61,0x69,0x64,0x72,0x69,0x6e,0xa5,0x21,0x17,0x74,0x65, -0x69,0x6d,0x61,0x79,0x65,0x6b,0xa2,0xb8,0x12,0x65,0x78,0x74,0xa2,0xd5,0x16,0x65, -0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xd5,0x18,0x64,0x65,0x6b,0x69,0x6b,0x61,0x6b, -0x75,0x69,0xa3,0xeb,6,0x6b,0x3b,0x6b,0x56,0x6f,0x5a,0x75,0x64,0x79,0x11,0x69, -0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63,0x68,0x75,0x65,0x68,0x6d,0x6f, -0x6e,0x67,0xa5,0x27,0x10,0x6f,0xa3,0x92,0x14,0x62,0x6c,0x6f,0x63,0x6b,0x21,1, -0x6d,0x2c,0x73,0x11,0x68,0x75,0xa5,0x15,0x17,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d, -0x73,0x7b,0x61,0x44,0x62,0x21,0x65,0x10,0x77,1,0x61,0xa5,0xe,0x74,0x14,0x61, -0x69,0x6c,0x75,0x65,0xa3,0x8b,2,0x62,0x3c,0x67,0x4a,0x6e,0x17,0x64,0x69,0x6e, -0x61,0x67,0x61,0x72,0x69,0xa5,0x26,0x15,0x61,0x74,0x61,0x65,0x61,0x6e,0xa3,0xef, -0x16,0x6d,0x75,0x6e,0x64,0x61,0x72,0x69,0xa5,0x47,0x67,0xc4,0x5d,0x6a,0xc1,0xe4, -0x6a,0xa2,0xdf,0x6b,0xa2,0xf8,0x6c,4,0x61,0x54,0x65,0xa2,0x6b,0x69,0xa2,0x82, -0x6f,0xa2,0xc1,0x79,1,0x63,0x2e,0x64,0x12,0x69,0x61,0x6e,0xa3,0xa9,0x12,0x69, -0x61,0x6e,0xa3,0xa7,1,0x6f,0x55,0x74,0x11,0x69,0x6e,1,0x31,0x96,0x65,0x11, -0x78,0x74,6,0x64,0x21,0x64,0xa3,0x95,0x65,0x2c,0x66,0xa5,0x39,0x67,0xa5,0x3a, -0xa2,0xe7,0x13,0x6e,0x64,0x65,0x64,6,0x64,0xc,0x64,0xa3,0x95,0x65,0xa3,0xe7, -0x66,0xa5,0x39,0x67,0xa5,0x3a,0x61,0x2a,0x62,0x29,0x63,0xa3,0x94,0x26,0x18,0x64, -0x64,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x6d,0x24,0x12,0x73,0x75,0x70,0x24,0x16, -0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x25,1,0x70,0x42,0x74,0x1d,0x74,0x65,0x72, -0x6c,0x69,0x6b,0x65,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x79,0x12,0x63,0x68,0x61, -0xa3,0x9c,2,0x6d,0x4e,0x6e,0x54,0x73,0x10,0x75,0xa2,0xb0,0x12,0x73,0x75,0x70, -0xa4,0x31,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x31,0x11,0x62,0x75,0xa3, -0x6f,0x12,0x65,0x61,0x72,1,0x61,0xa3,0xe8,0x62,1,0x69,0x38,0x73,0x17,0x79, -0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x75,0x17,0x64,0x65,0x6f,0x67,0x72,0x61, -0x6d,0x73,0xa3,0x76,0x1a,0x77,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73, -0xa3,0x4d,0x10,0x61,1,0x6d,0x32,0x76,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0xb5, -0x10,0x6f,0x5c,0x12,0x65,0x78,0x74,1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,1,0x61, -0xa2,0x43,0x68,4,0x61,0x40,0x69,0x50,0x6d,0x6e,0x6f,0x86,0x75,0x15,0x64,0x61, -0x77,0x61,0x64,0x69,0xa3,0xe6,0x16,0x72,0x6f,0x73,0x68,0x74,0x68,0x69,0xa3,0x89, -0x1d,0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa5, -0x30,0x11,0x65,0x72,0x68,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x71,0x12, -0x6a,0x6b,0x69,0xa3,0xe5,5,0x74,0x35,0x74,0x34,0x77,0x7a,0x79,0x13,0x61,0x68, -0x6c,0x69,0xa3,0xa2,0x14,0x61,0x6b,0x61,0x6e,0x61,0x9e,1,0x65,0x4c,0x70,0x10, -0x68,0x1f,0x6f,0x6e,0x65,0x74,0x69,0x63,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f, -0x6e,0x73,0xa3,0x6b,0x11,0x78,0x74,0xa3,0x6b,0x10,0x69,0xa5,0x46,0x69,0xa2,0x4e, -0x6b,0xa2,0x51,0x6e,3,0x61,0x34,0x62,0x84,0x67,0x8a,0x6e,0x12,0x61,0x64,0x61, -0x4d,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0xcb,0x16,0x70,0x6c,0x65,0x6d,0x65, -0x6e,0x74,0xa3,0xcb,0x11,0x78,0x74,2,0x61,0xa5,0x13,0x62,0xa5,0x38,0x65,0x13, -0x6e,0x64,0x65,0x64,1,0x61,0xa5,0x13,0x62,0xa5,0x38,0x11,0x75,0x6e,0xa3,0x42, -0x11,0x78,0x69,0x96,0x17,0x72,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0x97,0x12,0x74, -0x68,0x69,0xa3,0xc1,0x1c,0x74,0x6f,0x76,0x69,0x6b,0x6e,0x75,0x6d,0x65,0x72,0x61, -0x6c,0x73,0xa5,0x45,0x67,0xa2,0xb5,0x68,0xa4,0x84,0x69,3,0x64,0x4c,0x6d,0xa2, -0x55,0x6e,0xa2,0x62,0x70,0x13,0x61,0x65,0x78,0x74,0x2a,0x16,0x65,0x6e,0x73,0x69, -0x6f,0x6e,0x73,0x2b,1,0x63,0x99,0x65,0x17,0x6f,0x67,0x72,0x61,0x70,0x68,0x69, -0x63,1,0x64,0x56,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa4,0xb,0x1d,0x61, -0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa5,0xb,0x13, -0x65,0x73,0x63,0x72,0x1f,0x69,0x70,0x74,0x69,0x6f,0x6e,0x63,0x68,0x61,0x72,0x61, -0x63,0x74,0x65,0x72,0x73,0x99,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61, -0x6d,0x61,0x69,0x63,0xa3,0xba,1,0x64,0x62,0x73,0x1b,0x63,0x72,0x69,0x70,0x74, -0x69,0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61, -0x6e,0xa3,0xbd,0x13,0x6c,0x61,0x76,0x69,0xa3,0xbe,0x11,0x69,0x63,1,0x6e,0x3e, -0x73,0x1a,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa5,0x1e,0x19, -0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,4,0x65,0x74,0x6c, -0xa2,0x82,0x6f,0xa2,0x9a,0x72,0xa2,0x9e,0x75,2,0x6a,0x34,0x6e,0x3e,0x72,0x14, -0x6d,0x75,0x6b,0x68,0x69,0x43,0x14,0x61,0x72,0x61,0x74,0x69,0x45,0x18,0x6a,0x61, -0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x1c,1,0x6e,0xa2,0x46,0x6f,1,0x6d, -0x6e,0x72,0x13,0x67,0x69,0x61,0x6e,0x5a,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2, -0x87,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x87,0x11,0x78,0x74,0xa4,0x1b, -0x14,0x65,0x6e,0x64,0x65,0x64,0xa5,0x1b,0x1a,0x65,0x74,0x72,0x69,0x63,0x73,0x68, -0x61,0x70,0x65,0x73,0x8c,0x12,0x65,0x78,0x74,0xa2,0xe3,0x14,0x65,0x6e,0x64,0x65, -0x64,0xa3,0xe3,0x1e,0x65,0x72,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, -0x69,0x6f,0x6e,0x71,0x17,0x61,0x67,0x6f,0x6c,0x69,0x74,0x69,0x63,0xa2,0x88,0x12, -0x73,0x75,0x70,0xa4,0xa,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xa,0x13, -0x74,0x68,0x69,0x63,0xa3,0x59,1,0x61,0x5c,0x65,0x11,0x65,0x6b,0x30,1,0x61, -0x38,0x65,0x11,0x78,0x74,0x6e,0x14,0x65,0x6e,0x64,0x65,0x64,0x6f,0x17,0x6e,0x64, -0x63,0x6f,0x70,0x74,0x69,0x63,0x31,0x13,0x6e,0x74,0x68,0x61,0xa3,0xe4,2,0x61, -0xa2,0x48,0x65,0xa2,0xdf,0x69,1,0x67,0x30,0x72,0x14,0x61,0x67,0x61,0x6e,0x61, -0x9d,0x10,0x68,1,0x70,0x3a,0x73,0x18,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65, -0x73,0xa3,0x4b,1,0x72,0x3c,0x75,0x19,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74, -0x65,0x73,0xa3,0x4c,0x11,0x69,0x76,0x1f,0x61,0x74,0x65,0x75,0x73,0x65,0x73,0x75, -0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c,2,0x6c,0x32,0x6e,0x9a,0x74, -0x12,0x72,0x61,0x6e,0xa5,2,0x10,0x66,2,0x61,0x58,0x6d,0x70,0x77,0x14,0x69, -0x64,0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68, -0x66,0x6f,0x72,0x6d,0x73,0xa3,0x57,0x1a,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x66,0x6f, -0x72,0x6d,0x73,0xa3,0x57,0x13,0x61,0x72,0x6b,0x73,0xa3,0x52,2,0x67,0x34,0x69, -0xa2,0x45,0x75,0x12,0x6e,0x6f,0x6f,0xa3,0x63,0x11,0x75,0x6c,0xa2,0x4a,2,0x63, -0x3c,0x6a,0x5e,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,0xa3,0x4a,0x1f, -0x6f,0x6d,0x70,0x61,0x74,0x69,0x62,0x69,0x6c,0x69,0x74,0x79,0x6a,0x61,0x6d,0x6f, -0xa3,0x41,0x12,0x61,0x6d,0x6f,0x5c,0x17,0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64, -1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,0x19,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67, -0x79,0x61,0xa5,0x1d,0x13,0x62,0x72,0x65,0x77,0x37,0x61,0xa4,0xc,0x62,0xa6,0x59, -0x63,0xa8,0x2e,0x64,0xac,0xe3,0x65,5,0x6d,0xa9,0x6d,0x94,0x6e,0xa2,0x41,0x74, -0x15,0x68,0x69,0x6f,0x70,0x69,0x63,0x5e,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2, -0x86,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x86,0x11,0x78,0x74,0xa2,0x85, -2,0x61,0xa3,0xc8,0x62,0xa5,0x37,0x65,0x13,0x6e,0x64,0x65,0x64,0xa2,0x85,1, -0x61,0xa3,0xc8,0x62,0xa5,0x37,0x16,0x6f,0x74,0x69,0x63,0x6f,0x6e,0x73,0xa3,0xce, -0x15,0x63,0x6c,0x6f,0x73,0x65,0x64,2,0x61,0x5a,0x63,0x9e,0x69,0x1c,0x64,0x65, -0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,0x73,0x75,0x70,0xa2,0xc4,0x16,0x70,0x6c, -0x65,0x6d,0x65,0x6e,0x74,0xa3,0xc4,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0x86, -1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,0xc3,0x13,0x72,0x69,0x63,0x73,0x86,0x18, -0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xc3,0x11,0x6a,0x6b,0xa2,0x44, -0x1f,0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x61,0x6e,0x64,0x6d,0x6f,0x6e,0x74,0x68, -0x73,0xa3,0x44,0x61,0x4a,0x67,0x76,0x6c,1,0x62,0x30,0x79,0x13,0x6d,0x61,0x69, -0x63,0xa5,0x25,0x13,0x61,0x73,0x61,0x6e,0xa3,0xe2,0x13,0x72,0x6c,0x79,0x64,0x1f, -0x79,0x6e,0x61,0x73,0x74,0x69,0x63,0x63,0x75,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d, -0xa5,1,0x1f,0x79,0x70,0x74,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c, -0x79,0x70,0x68,1,0x66,0x26,0x73,0xa3,0xc2,0x1c,0x6f,0x72,0x6d,0x61,0x74,0x63, -0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,0xa5,0x24,7,0x6e,0xc0,0xf2,0x6e,0x3e,0x72, -0xa2,0x5d,0x73,0xa2,0xe5,0x76,0x14,0x65,0x73,0x74,0x61,0x6e,0xa3,0xbc,1,0x61, -0x92,0x63,0x13,0x69,0x65,0x6e,0x74,1,0x67,0x34,0x73,0x15,0x79,0x6d,0x62,0x6f, -0x6c,0x73,0xa3,0xa5,0x13,0x72,0x65,0x65,0x6b,1,0x6d,0x34,0x6e,0x15,0x75,0x6d, -0x62,0x65,0x72,0x73,0xa3,0x7f,0x13,0x75,0x73,0x69,0x63,0xa2,0x7e,0x19,0x61,0x6c, -0x6e,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x7e,0x10,0x74,0x1f,0x6f,0x6c,0x69, -0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xfe,2, -0x61,0x32,0x6d,0xa2,0x7e,0x72,0x12,0x6f,0x77,0x73,0x7d,0x12,0x62,0x69,0x63,0x38, -3,0x65,0x4a,0x6d,0x80,0x70,0xa2,0x50,0x73,0x11,0x75,0x70,0xa2,0x80,0x16,0x70, -0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x80,0x11,0x78,0x74,3,0x61,0xa3,0xd2,0x62, -0xa5,0x35,0x63,0xa5,0x41,0x65,0x13,0x6e,0x64,0x65,0x64,2,0x61,0xa3,0xd2,0x62, -0xa5,0x35,0x63,0xa5,0x41,0x12,0x61,0x74,0x68,0xa2,0xd3,0x18,0x65,0x6d,0x61,0x74, -0x69,0x63,0x61,0x6c,0x61,0x1f,0x6c,0x70,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x73, -0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd3,1,0x66,0x42,0x72,0x1e,0x65,0x73,0x65, -0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,1,0x61,0xa3,0x51, -0x62,0xa3,0x55,0x14,0x65,0x6e,0x69,0x61,0x6e,0x35,0x12,0x63,0x69,0x69,0x23,0x64, -0x9e,0x65,0xa2,0x42,0x68,0xa2,0x4d,0x6c,1,0x63,0x62,0x70,0x17,0x68,0x61,0x62, -0x65,0x74,0x69,0x63,0x70,1,0x66,0xa3,0x50,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74, -0x61,0x74,0x69,0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,0xa3,0x50,0x16,0x68,0x65,0x6d, -0x69,0x63,0x61,0x6c,0xa2,0xd0,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd0, -0x12,0x6c,0x61,0x6d,0xa5,7,0x1a,0x67,0x65,0x61,0x6e,0x6e,0x75,0x6d,0x62,0x65, -0x72,0x73,0xa3,0x77,0x11,0x6f,0x6d,0xa3,0xfd,7,0x6f,0x71,0x6f,0x64,0x72,0xa2, -0x41,0x75,0xa2,0x58,0x79,0x1b,0x7a,0x61,0x6e,0x74,0x69,0x6e,0x65,0x6d,0x75,0x73, -0x69,0x63,0xa2,0x5b,0x18,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5b, -1,0x70,0x34,0x78,0x16,0x64,0x72,0x61,0x77,0x69,0x6e,0x67,0x89,0x14,0x6f,0x6d, -0x6f,0x66,0x6f,0xa0,0x12,0x65,0x78,0x74,0xa2,0x43,0x14,0x65,0x6e,0x64,0x65,0x64, -0xa3,0x43,0x10,0x61,1,0x68,0x40,0x69,0x12,0x6c,0x6c,0x65,0x92,0x17,0x70,0x61, -0x74,0x74,0x65,0x72,0x6e,0x73,0x93,0x11,0x6d,0x69,0xa3,0xc9,1,0x67,0x2c,0x68, -0x11,0x69,0x64,0xa3,0x64,0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x81,0x61,0x48,0x65, -0xa2,0x4e,0x68,0xa2,0x52,0x6c,0x1a,0x6f,0x63,0x6b,0x65,0x6c,0x65,0x6d,0x65,0x6e, -0x74,0x73,0x8b,3,0x6c,0x34,0x6d,0x40,0x73,0x66,0x74,0x11,0x61,0x6b,0xa3,0xc7, -0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x93,0x11,0x75,0x6d,0xa2,0xb1,0x12,0x73,0x75, -0x70,0xa2,0xca,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xca,1,0x69,0x30, -0x73,0x13,0x61,0x76,0x61,0x68,0xa3,0xdd,0x15,0x63,0x6c,0x61,0x74,0x69,0x6e,0x23, -0x14,0x6e,0x67,0x61,0x6c,0x69,0x41,0x16,0x61,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa5, -8,5,0x6f,0xc1,0x60,0x6f,0xa2,0x69,0x75,0xa4,0x24,0x79,1,0x70,0xa2,0x44, -0x72,0x14,0x69,0x6c,0x6c,0x69,0x63,0x32,1,0x65,0x4c,0x73,0x11,0x75,0x70,0xa2, -0x61,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa2,0x61,0x12,0x61,0x72,0x79,0xa3, -0x61,0x11,0x78,0x74,4,0x61,0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x64,0xa5, -0x43,0x65,0x13,0x6e,0x64,0x65,0x64,3,0x61,0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5, -9,0x64,0xa5,0x43,0x10,0x72,1,0x69,0x34,0x6f,0x15,0x6d,0x69,0x6e,0x6f,0x61, -0x6e,0xa5,0x36,0x1a,0x6f,0x74,0x73,0x79,0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3, -0x7b,3,0x6d,0x5a,0x6e,0xa2,0x95,0x70,0xa2,0xa0,0x75,0x17,0x6e,0x74,0x69,0x6e, -0x67,0x72,0x6f,0x64,0xa2,0x9a,0x17,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0xa3, -0x9a,2,0x62,0x3a,0x6d,0xa2,0x5f,0x70,0x15,0x61,0x74,0x6a,0x61,0x6d,0x6f,0xa3, -0x41,0x14,0x69,0x6e,0x69,0x6e,0x67,2,0x64,0x46,0x68,0x9e,0x6d,0x1d,0x61,0x72, -0x6b,0x73,0x66,0x6f,0x72,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x1e,0x69,0x61, -0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x6d,0x61,0x72,0x6b,0x73,0x2e,2,0x65, -0x40,0x66,0xa6,0x4c,0x73,0x18,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3, -0x83,0x16,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,0xa3,0xe0,0x17,0x61,0x6c,0x66,0x6d, -0x61,0x72,0x6b,0x73,0xa3,0x52,0x11,0x6f,0x6e,0x1f,0x69,0x6e,0x64,0x69,0x63,0x6e, -0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,0x1b,0x74,0x72,0x6f, -0x6c,0x70,0x69,0x63,0x74,0x75,0x72,0x65,0x73,0x83,0x12,0x74,0x69,0x63,0xa2,0x84, -0x1b,0x65,0x70,0x61,0x63,0x74,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0xdf,1, -0x6e,0x3e,0x72,0x1b,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73, -0x75,0x15,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa2,0x98,0x16,0x6e,0x75,0x6d,0x62,0x65, -0x72,0x73,0xa2,0x99,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, -0x69,0x6f,0x6e,0xa3,0x99,0x61,0xa2,0xe4,0x68,0xa4,0xe,0x6a,0x10,0x6b,0xa2,0x47, -4,0x63,0x8c,0x65,0xa2,0x80,0x72,0xa2,0x9b,0x73,0xa2,0xad,0x75,0x1f,0x6e,0x69, -0x66,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x47, -0x18,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,7,0x65,0x6b,0x65,0xa5,0, -0x66,0xa5,0x12,0x67,0xa5,0x2e,0x68,0xa5,0x42,0x14,0x6f,0x6d,0x70,0x61,0x74,0xa2, -0x45,1,0x66,0x96,0x69,1,0x62,0x44,0x64,0x17,0x65,0x6f,0x67,0x72,0x61,0x70, -0x68,0x73,0xa2,0x4f,0x12,0x73,0x75,0x70,0xa3,0x5f,0x14,0x69,0x6c,0x69,0x74,0x79, -0xa2,0x45,1,0x66,0x54,0x69,0x18,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73, -0xa2,0x4f,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x5f,0x13, -0x6f,0x72,0x6d,0x73,0xa3,0x53,0x11,0x78,0x74,7,0x65,0xc,0x65,0xa5,0,0x66, -0xa5,0x12,0x67,0xa5,0x2e,0x68,0xa5,0x42,0x61,0xa3,0x46,0x62,0xa3,0x5e,0x63,0xa3, -0xc5,0x64,0xa3,0xd1,0x19,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0x73,0x75,0x70,0x94, -0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x95,1,0x74,0x50,0x79,0x14,0x6d,0x62, -0x6f,0x6c,0x73,0x9a,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, -0x69,0x6f,0x6e,0x9b,0x14,0x72,0x6f,0x6b,0x65,0x73,0xa3,0x82,2,0x6e,0x48,0x72, -0x64,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69,0x61, -0x6e,0xa3,0xde,0x1d,0x61,0x64,0x69,0x61,0x6e,0x73,0x79,0x6c,0x6c,0x61,0x62,0x69, -0x63,0x73,0x63,0x12,0x69,0x61,0x6e,0xa3,0xa8,2,0x61,0x3a,0x65,0x4c,0x6f,0x16, -0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa5,0x2d,1,0x6b,0x26,0x6d,0xa3,0xa4,0x11, -0x6d,0x61,0xa3,0xd4,1,0x72,0x38,0x73,0x17,0x73,0x73,0x79,0x6d,0x62,0x6f,0x6c, -0x73,0xa5,0x19,0x13,0x6f,0x6b,0x65,0x65,0x60,0x12,0x73,0x75,0x70,0xa2,0xff,0x16, -0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xff,3,0x65,0x3e,0x69,0x8e,0x6f,0xa2, -0x71,0x75,0x15,0x70,0x6c,0x6f,0x79,0x61,0x6e,0xa3,0xe1,1,0x73,0x60,0x76,0x16, -0x61,0x6e,0x61,0x67,0x61,0x72,0x69,0x3e,0x12,0x65,0x78,0x74,0xa2,0xb3,1,0x61, -0xa5,0x44,0x65,0x13,0x6e,0x64,0x65,0x64,0xa2,0xb3,0x10,0x61,0xa5,0x44,0x13,0x65, -0x72,0x65,0x74,0xa3,0x5a,2,0x61,0x3a,0x6e,0x82,0x76,0x16,0x65,0x73,0x61,0x6b, -0x75,0x72,0x75,0xa5,0x2f,0x18,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x73,0x2e, -2,0x65,0x30,0x66,0x36,0x73,0x11,0x75,0x70,0xa3,0x83,0x11,0x78,0x74,0xa3,0xe0, -0x18,0x6f,0x72,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x14,0x67,0x62,0x61,0x74, -0x73,0x91,1,0x67,0x3e,0x6d,0x12,0x69,0x6e,0x6f,0xa2,0xab,0x14,0x74,0x69,0x6c, -0x65,0x73,0xa3,0xab,0x11,0x72,0x61,0xa5,0x1a,8,0x6d,0x5f,0x6d,0x3a,0x6e,0x48, -0x73,0x7a,0x76,0xa2,0x4b,0x77,0x12,0x69,0x64,0x65,0x43,0x11,0x65,0x64,0x32,0x12, -0x69,0x61,0x6c,0x33,2,0x61,0x40,0x62,0x37,0x6f,1,0x62,0x28,0x6e,0x10,0x65, -0x21,0x13,0x72,0x65,0x61,0x6b,0x37,0x10,0x72,0x34,0x12,0x72,0x6f,0x77,0x35,2, -0x6d,0x38,0x71,0x46,0x75,1,0x62,0x3d,0x70,0x3e,0x11,0x65,0x72,0x3f,1,0x61, -0x24,0x6c,0x39,0x11,0x6c,0x6c,0x39,1,0x72,0x3b,0x75,0x12,0x61,0x72,0x65,0x3b, -0x12,0x65,0x72,0x74,0x40,0x13,0x69,0x63,0x61,0x6c,0x41,0x63,0x58,0x65,0x92,0x66, -0x96,0x69,1,0x6e,0x36,0x73,0x10,0x6f,0x30,0x14,0x6c,0x61,0x74,0x65,0x64,0x31, -0x11,0x69,0x74,0x2e,0x12,0x69,0x61,0x6c,0x2f,2,0x61,0x36,0x69,0x48,0x6f,0x10, -0x6d,0x24,0x12,0x70,0x61,0x74,0x25,0x10,0x6e,0x22,0x15,0x6f,0x6e,0x69,0x63,0x61, -0x6c,0x23,0x13,0x72,0x63,0x6c,0x65,0x27,0x11,0x6e,0x63,0x27,2,0x69,0x3a,0x6f, -0x44,0x72,0x10,0x61,0x2c,0x14,0x63,0x74,0x69,0x6f,0x6e,0x2d,0x10,0x6e,0x28,0x11, -0x61,0x6c,0x29,0x11,0x6e,0x74,0x2b,4,0x61,0x3a,0x66,0x4c,0x68,0x5e,0x6e,0x70, -0x77,0x2a,0x12,0x69,0x64,0x65,0x2b,0x22,0x17,0x6d,0x62,0x69,0x67,0x75,0x6f,0x75, -0x73,0x23,0x26,0x17,0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x27,0x24,0x17,0x61, -0x6c,0x66,0x77,0x69,0x64,0x74,0x68,0x25,0x20,1,0x61,0x30,0x65,0x14,0x75,0x74, -0x72,0x61,0x6c,0x21,0x28,0x13,0x72,0x72,0x6f,0x77,0x29,0xd,0x6e,0xc0,0xfb,0x73, -0x6d,0x73,0x3a,0x74,0x98,0x75,0xa2,0x49,0x7a,2,0x6c,0x3b,0x70,0x3d,0x73,0x39, -5,0x6f,0x28,0x6f,0x57,0x70,0x34,0x75,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65, -0x45,0x11,0x61,0x63,1,0x65,0x32,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x31, -0x18,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x39,0x63,0x53,0x6b,0x55,0x6d, -0x51,0x1d,0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72, -0x27,1,0x6e,0x40,0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74, -0x74,0x65,0x72,0x23,0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x21,0x6e,0x8a, -0x6f,0xa2,0x47,0x70,8,0x66,0x14,0x66,0x5b,0x69,0x59,0x6f,0x4f,0x72,0x24,0x73, -0x49,0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x43,0x61,0x2c,0x63,0x4d,0x64, -0x47,0x65,0x4b,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72, -0x61,0x74,0x6f,0x72,0x3d,2,0x64,0x33,0x6c,0x35,0x6f,0x36,0x1b,0x6e,0x73,0x70, -0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2d,1,0x70,0x7c,0x74,0x12,0x68, -0x65,0x72,3,0x6c,0x38,0x6e,0x42,0x70,0x4c,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c, -0x57,0x14,0x65,0x74,0x74,0x65,0x72,0x2b,0x14,0x75,0x6d,0x62,0x65,0x72,0x37,0x19, -0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x1c,0x65,0x6e,0x70,0x75, -0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49,0x66,0x9e,0x66,0x88,0x69,0xa2, -0x4b,0x6c,0xa2,0x5c,0x6d,4,0x61,0x60,0x63,0x31,0x65,0x2f,0x6e,0x2d,0x6f,0x15, -0x64,0x69,0x66,0x69,0x65,0x72,1,0x6c,0x30,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c, -0x55,0x14,0x65,0x74,0x74,0x65,0x72,0x29,0x17,0x74,0x68,0x73,0x79,0x6d,0x62,0x6f, -0x6c,0x51,1,0x69,0x2e,0x6f,0x13,0x72,0x6d,0x61,0x74,0x41,0x1d,0x6e,0x61,0x6c, -0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x5b,0x10,0x6e,0x1f,0x69, -0x74,0x69,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59, -6,0x6d,0x18,0x6d,0x29,0x6f,0x28,0x74,0x27,0x75,0x23,0x2a,0x1c,0x77,0x65,0x72, -0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x65,0x28,0x69,0x3c,0x6c, -0x25,0x19,0x74,0x74,0x65,0x72,0x6e,0x75,0x6d,0x62,0x65,0x72,0x35,0x1a,0x6e,0x65, -0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x3b,0x63,0x44,0x64,0xa2,0x60,0x65, -0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2f,6,0x6e, -0x39,0x6e,0x46,0x6f,0x4e,0x73,0x45,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73, -0x79,0x6d,0x62,0x6f,0x6c,0x53,0x20,0x12,0x74,0x72,0x6c,0x3f,0x42,0x10,0x6e,1, -0x6e,0x2c,0x74,0x12,0x72,0x6f,0x6c,0x3f,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75, -0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4d,0x63,0x3f,0x66,0x41,0x6c,0x1d, -0x6f,0x73,0x65,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4b,2, -0x61,0x30,0x65,0x4a,0x69,0x12,0x67,0x69,0x74,0x33,0x1c,0x73,0x68,0x70,0x75,0x6e, -0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x47,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e, -0x75,0x6d,0x62,0x65,0x72,0x33,0,0x13,0x6e,0xc1,0xf,0x74,0x76,0x74,0x4c,0x76, -0x9a,0x77,0xa2,0x48,0x79,0xa2,0x49,0x7a,1,0x61,0x2c,0x68,0x12,0x61,0x69,0x6e, -0x8b,0x11,0x69,0x6e,0x85,2,0x61,0x36,0x65,0x3c,0x68,0x14,0x69,0x6e,0x79,0x65, -0x68,0xa3,0x66,1,0x68,0x71,0x77,0x73,1,0x68,0x28,0x74,0x10,0x68,0x77,0x16, -0x6d,0x61,0x72,0x62,0x75,0x74,0x61,0x74,0x13,0x67,0x6f,0x61,0x6c,0x3d,0x1a,0x65, -0x72,0x74,0x69,0x63,0x61,0x6c,0x74,0x61,0x69,0x6c,0xa3,0x67,0x11,0x61,0x77,0x79, -1,0x65,0x32,0x75,0x11,0x64,0x68,0x80,0x11,0x68,0x65,0x83,0x10,0x68,0x7a,1, -0x62,0x34,0x77,0x16,0x69,0x74,0x68,0x74,0x61,0x69,0x6c,0x7f,0x14,0x61,0x72,0x72, -0x65,0x65,0x7d,0x6e,0xa2,0x4c,0x70,0xa2,0x69,0x71,0xa2,0x69,0x72,0xa2,0x6f,0x73, -5,0x74,0x22,0x74,0x38,0x77,0x4c,0x79,0x16,0x72,0x69,0x61,0x63,0x77,0x61,0x77, -0x6f,0x18,0x72,0x61,0x69,0x67,0x68,0x74,0x77,0x61,0x77,0xa3,0x55,0x15,0x61,0x73, -0x68,0x6b,0x61,0x66,0x6d,0x61,0x2e,0x65,0x38,0x68,0x11,0x69,0x6e,0x6b,0x10,0x64, -0x62,0x11,0x68,0x65,0x65,1,0x65,0x2e,0x6d,0x13,0x6b,0x61,0x74,0x68,0x69,0x10, -0x6e,0x67,2,0x6f,0x2c,0x75,0x50,0x79,0x10,0x61,0x91,1,0x6a,0x28,0x6f,0x10, -0x6e,0x55,0x1a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x67,0x72,0x6f,0x75,0x70,0x21,0x10, -0x6e,0x57,0x10,0x65,0x59,0x10,0x61,1,0x66,0x5b,0x70,0x10,0x68,0x5d,1,0x65, -0x38,0x6f,0x18,0x68,0x69,0x6e,0x67,0x79,0x61,0x79,0x65,0x68,0x93,1,0x68,0x5f, -0x76,0x16,0x65,0x72,0x73,0x65,0x64,0x70,0x65,0x61,0x67,0xc1,0xc7,0x67,0xa4,0x52, -0x68,0xa4,0x59,0x6b,0xa4,0x99,0x6c,0xa4,0xb2,0x6d,2,0x61,0x2e,0x65,0xa4,0x3e, -0x69,0x10,0x6d,0x53,1,0x6c,0xa2,0xe7,0x6e,0x16,0x69,0x63,0x68,0x61,0x65,0x61, -0x6e,0,0x12,0x6e,0x76,0x73,0x51,0x73,0x3e,0x74,0x5c,0x77,0xa0,0x79,0xa2,0x42, -0x7a,0x13,0x61,0x79,0x69,0x6e,0xa3,0x54,0x10,0x61,1,0x64,0x2e,0x6d,0x12,0x65, -0x6b,0x68,0xa3,0x4c,0x11,0x68,0x65,0xa3,0x4b,3,0x61,0x38,0x65,0x3c,0x68,0x4a, -0x77,0x13,0x65,0x6e,0x74,0x79,0xa3,0x51,0x10,0x77,0xa3,0x4d,1,0x6e,0xa3,0x4e, -0x74,0x10,0x68,0xa3,0x4f,0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x50,0x11,0x61,0x77, -0xa3,0x52,0x12,0x6f,0x64,0x68,0xa3,0x53,0x6e,0x3a,0x6f,0x40,0x70,0x46,0x71,0x4a, -0x72,0x12,0x65,0x73,0x68,0xa3,0x4a,0x11,0x75,0x6e,0xa3,0x46,0x11,0x6e,0x65,0xa3, -0x47,0x10,0x65,0xa3,0x48,0x12,0x6f,0x70,0x68,0xa3,0x49,0x67,0x33,0x67,0x38,0x68, -0x40,0x6b,0x5e,0x6c,0x66,0x6d,0x11,0x65,0x6d,0xa3,0x45,0x13,0x69,0x6d,0x65,0x6c, -0xa1,1,0x65,0x32,0x75,0x14,0x6e,0x64,0x72,0x65,0x64,0xa3,0x42,0x11,0x74,0x68, -0xa3,0x41,0x12,0x61,0x70,0x68,0xa3,0x43,0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x44, -0x61,0x34,0x62,0x4a,0x64,0x50,0x66,0x12,0x69,0x76,0x65,0x9f,1,0x6c,0x2a,0x79, -0x11,0x69,0x6e,0x97,0x12,0x65,0x70,0x68,0x95,0x12,0x65,0x74,0x68,0x99,1,0x61, -0x30,0x68,0x14,0x61,0x6d,0x65,0x64,0x68,0x9d,0x13,0x6c,0x65,0x74,0x68,0x9b,0x15, -0x61,0x79,0x61,0x6c,0x61,0x6d,6,0x6e,0x2c,0x6e,0x34,0x72,0x5e,0x73,0x62,0x74, -0x11,0x74,0x61,0xa3,0x63,2,0x67,0x2e,0x6e,0x32,0x79,0x10,0x61,0xa3,0x60,0x10, -0x61,0xa3,0x5d,1,0x61,0xa3,0x5e,0x6e,0x10,0x61,0xa3,0x5f,0x10,0x61,0xa3,0x61, -0x11,0x73,0x61,0xa3,0x62,0x62,0x3c,0x6a,0x42,0x6c,0x10,0x6c,1,0x61,0xa3,0x5b, -0x6c,0x10,0x61,0xa3,0x5c,0x11,0x68,0x61,0xa3,0x59,0x10,0x61,0xa3,0x5a,0x11,0x65, -0x6d,0x51,0x10,0x61,1,0x66,0x37,0x6d,0x11,0x61,0x6c,0x39,1,0x61,0x40,0x65, -0x3e,1,0x68,0x28,0x74,0x10,0x68,0x45,0x40,0x13,0x67,0x6f,0x61,0x6c,0x43,2, -0x68,0x3b,0x6d,0x5c,0x6e,0x1a,0x69,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79, -0x61,1,0x6b,0x2a,0x70,0x10,0x61,0xa3,0x65,0x15,0x69,0x6e,0x6e,0x61,0x79,0x61, -0xa3,0x64,0x1a,0x7a,0x61,0x6f,0x6e,0x68,0x65,0x68,0x67,0x6f,0x61,0x6c,0x3d,2, -0x61,0x3a,0x68,0x44,0x6e,0x17,0x6f,0x74,0x74,0x65,0x64,0x68,0x65,0x68,0x4b,1, -0x66,0x47,0x70,0x10,0x68,0x49,0x12,0x61,0x70,0x68,0x89,0x11,0x61,0x6d,0x4c,0x12, -0x61,0x64,0x68,0x4f,0x61,0x6e,0x62,0xa2,0x54,0x64,0xa2,0x70,0x65,0x31,0x66,2, -0x61,0x3e,0x65,0x4a,0x69,0x19,0x6e,0x61,0x6c,0x73,0x65,0x6d,0x6b,0x61,0x74,0x68, -0x35,0x15,0x72,0x73,0x69,0x79,0x65,0x68,0x8f,0x86,0x10,0x68,0x33,2,0x66,0x3c, -0x69,0x70,0x6c,1,0x61,0x28,0x65,0x10,0x66,0x27,0x11,0x70,0x68,0x25,0x14,0x72, -0x69,0x63,0x61,0x6e,2,0x66,0x30,0x6e,0x36,0x71,0x11,0x61,0x66,0xa3,0x58,0x11, -0x65,0x68,0xa3,0x56,0x12,0x6f,0x6f,0x6e,0xa3,0x57,0x10,0x6e,0x23,1,0x65,0x4a, -0x75,0x10,0x72,0x1f,0x75,0x73,0x68,0x61,0x73,0x6b,0x69,0x79,0x65,0x68,0x62,0x61, -0x72,0x72,0x65,0x65,0x8d,1,0x68,0x29,0x74,0x10,0x68,0x2b,0x11,0x61,0x6c,0x2c, -0x16,0x61,0x74,0x68,0x72,0x69,0x73,0x68,0x2f,7,0x6e,0x2e,0x6e,0x2c,0x72,0x3e, -0x74,0x56,0x75,0x21,0x18,0x6f,0x6e,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x21,0x28, -0x1a,0x69,0x67,0x68,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x29,0x2a,0x19,0x72, -0x61,0x6e,0x73,0x70,0x61,0x72,0x65,0x6e,0x74,0x2b,0x63,0x23,0x64,0x40,0x6a,0x56, -0x6c,0x26,0x19,0x65,0x66,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x27,0x24,0x19, -0x75,0x61,0x6c,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x25,0x19,0x6f,0x69,0x6e,0x63, -0x61,0x75,0x73,0x69,0x6e,0x67,0x23,0,0x13,0x6e,0xc0,0xd0,0x73,0x49,0x73,0x48, -0x75,0x78,0x77,0x84,0x78,0x9c,0x7a,0x10,0x77,0x58,1,0x6a,0x75,0x73,0x13,0x70, -0x61,0x63,0x65,0x59,4,0x61,0x51,0x67,0x53,0x70,0x28,0x75,0x30,0x79,0x57,0x54, -0x12,0x61,0x63,0x65,0x55,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x53,0x15,0x6e, -0x6b,0x6e,0x6f,0x77,0x6e,0x21,1,0x6a,0x5d,0x6f,0x17,0x72,0x64,0x6a,0x6f,0x69, -0x6e,0x65,0x72,0x5d,0x10,0x78,0x21,0x6e,0x60,0x6f,0xa2,0x41,0x70,0xa2,0x50,0x71, -0xa2,0x6e,0x72,1,0x65,0x24,0x69,0x6f,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69, -0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x6f,4,0x65,0x3e,0x6c,0x5b,0x6f,0x46, -0x73,0x45,0x75,0x46,0x14,0x6d,0x65,0x72,0x69,0x63,0x47,0x15,0x78,0x74,0x6c,0x69, -0x6e,0x65,0x5b,0x17,0x6e,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x45,0x10,0x70,0x48, -0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49,1, -0x6f,0x3e,0x72,0x4c,0x1a,0x65,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63, -0x4d,0x4a,0x1b,0x73,0x74,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4b, -0x10,0x75,0x4e,0x16,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x68,0x7b,0x68,0x50, -0x69,0x86,0x6a,0xa2,0x61,0x6c,0xa2,0x65,0x6d,0x1c,0x61,0x6e,0x64,0x61,0x74,0x6f, -0x72,0x79,0x62,0x72,0x65,0x61,0x6b,0x2d,4,0x32,0x5f,0x33,0x61,0x65,0x34,0x6c, -0x6d,0x79,0x3a,0x13,0x70,0x68,0x65,0x6e,0x3b,0x19,0x62,0x72,0x65,0x77,0x6c,0x65, -0x74,0x74,0x65,0x72,0x6d,2,0x64,0x28,0x6e,0x3c,0x73,0x41,0x3c,0x18,0x65,0x6f, -0x67,0x72,0x61,0x70,0x68,0x69,0x63,0x3d,0x3e,1,0x66,0x3e,0x73,0x11,0x65,0x70, -1,0x61,0x22,0x65,0x14,0x72,0x61,0x62,0x6c,0x65,0x3f,0x18,0x69,0x78,0x6e,0x75, -0x6d,0x65,0x72,0x69,0x63,0x41,2,0x6c,0x63,0x74,0x65,0x76,0x67,1,0x66,0x43, -0x69,0x15,0x6e,0x65,0x66,0x65,0x65,0x64,0x43,0x61,0x40,0x62,0x70,0x63,0xa2,0x55, -0x65,0xa2,0xdb,0x67,0x10,0x6c,0x38,0x11,0x75,0x65,0x39,2,0x69,0x23,0x6c,0x34, -0x6d,0x16,0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x24,0x17,0x70,0x68,0x61,0x62, -0x65,0x74,0x69,0x63,0x25,4,0x32,0x27,0x61,0x29,0x62,0x2b,0x6b,0x2d,0x72,0x12, -0x65,0x61,0x6b,2,0x61,0x36,0x62,0x3e,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73, -0x57,0x13,0x66,0x74,0x65,0x72,0x29,1,0x65,0x2a,0x6f,0x11,0x74,0x68,0x27,0x13, -0x66,0x6f,0x72,0x65,0x2b,7,0x6d,0x51,0x6d,0x33,0x6f,0x28,0x70,0x69,0x72,0x35, -1,0x6d,0x76,0x6e,1,0x64,0x3c,0x74,0x1a,0x69,0x6e,0x67,0x65,0x6e,0x74,0x62, -0x72,0x65,0x61,0x6b,0x2f,0x15,0x69,0x74,0x69,0x6f,0x6e,0x61,0x1f,0x6c,0x6a,0x61, -0x70,0x61,0x6e,0x65,0x73,0x65,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x6b,1,0x62, -0x3a,0x70,0x19,0x6c,0x65,0x78,0x63,0x6f,0x6e,0x74,0x65,0x78,0x74,0x51,0x18,0x69, -0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x33,0x61,0x6a,0x62,0x2f,0x6a,0x6b,0x6c, -0x30,0x13,0x6f,0x73,0x65,0x70,1,0x61,0x38,0x75,0x18,0x6e,0x63,0x74,0x75,0x61, -0x74,0x69,0x6f,0x6e,0x31,0x18,0x72,0x65,0x6e,0x74,0x68,0x65,0x73,0x69,0x73,0x69, -0x1b,0x72,0x72,0x69,0x61,0x67,0x65,0x72,0x65,0x74,0x75,0x72,0x6e,0x35,2,0x62, -0x3e,0x6d,0x46,0x78,0x36,0x18,0x63,0x6c,0x61,0x6d,0x61,0x74,0x69,0x6f,0x6e,0x37, -0x70,0x12,0x61,0x73,0x65,0x71,0x72,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x73, -1,0x64,0x42,0x6e,1,0x6f,0x32,0x75,0x26,0x14,0x6d,0x65,0x72,0x69,0x63,0x27, -0x11,0x6e,0x65,0x21,1,0x65,0x2e,0x69,0x24,0x12,0x67,0x69,0x74,0x25,0x22,0x14, -0x63,0x69,0x6d,0x61,0x6c,0x23,0,0x18,0x6e,0xc4,0x6f,0x74,0xc1,0x91,0x77,0x96, -0x77,0xa2,0x4c,0x78,0xa2,0x70,0x79,0xa2,0x7a,0x7a,6,0x73,0x1e,0x73,0x34,0x78, -0x42,0x79,0x48,0x7a,0x11,0x7a,0x7a,0xa3,0x67,0x10,0x79,1,0x65,0xa3,0xae,0x6d, -0xa3,0x81,0x11,0x78,0x78,0xa3,0x66,0x11,0x79,0x79,0x21,0x61,0x30,0x69,0x58,0x6d, -0x11,0x74,0x68,0xa3,0x80,0x10,0x6e,1,0x61,0x26,0x62,0xa3,0xb1,0x1a,0x62,0x61, -0x7a,0x61,0x72,0x73,0x71,0x75,0x61,0x72,0x65,0xa3,0xb1,0x11,0x6e,0x68,0x23,2, -0x61,0x30,0x63,0x5a,0x6f,0x11,0x6c,0x65,0xa3,0x9b,1,0x6e,0x3c,0x72,0x10,0x61, -0xa2,0x92,0x15,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0x92,0x12,0x63,0x68,0x6f,0xa3, -0xbc,0x11,0x68,0x6f,0xa3,0xbc,1,0x70,0x2c,0x73,0x11,0x75,0x78,0xa3,0x65,0x11, -0x65,0x6f,0x9b,1,0x65,0x2c,0x69,0x72,0x11,0x69,0x69,0x73,0x11,0x7a,0x69,0xa2, -0xc0,0x11,0x64,0x69,0xa3,0xc0,0x74,0x66,0x75,0xa2,0xde,0x76,1,0x61,0x48,0x69, -1,0x73,0x38,0x74,0x10,0x68,0xa2,0xc5,0x13,0x6b,0x75,0x71,0x69,0xa3,0xc5,0x10, -0x70,0xa3,0x64,0x10,0x69,0xa2,0x63,0x10,0x69,0xa3,0x63,7,0x68,0x3e,0x68,0x34, -0x69,0x48,0x6e,0x86,0x6f,0x11,0x74,0x6f,0xa3,0xc4,0x10,0x61,1,0x61,0x24,0x69, -0x6d,0x6a,0x11,0x6e,0x61,0x6b,2,0x62,0x3a,0x66,0x4a,0x72,0x10,0x68,0xa2,0x9e, -0x12,0x75,0x74,0x61,0xa3,0x9e,1,0x65,0x24,0x74,0x6f,0x12,0x74,0x61,0x6e,0x6f, -0x14,0x69,0x6e,0x61,0x67,0x68,0x99,0x11,0x73,0x61,0xa3,0xc3,0x61,0x36,0x65,0xa2, -0x65,0x66,0xa2,0x71,0x67,0x11,0x6c,0x67,0x75,6,0x6c,0x28,0x6c,0x32,0x6d,0x38, -0x6e,0x44,0x76,0x10,0x74,0xa3,0x7f,1,0x65,0x89,0x75,0x97,1,0x69,0x24,0x6c, -0x67,0x10,0x6c,0x67,0x10,0x67,0xa2,0x9a,1,0x73,0x2a,0x75,0x10,0x74,0xa3,0x9a, -0x10,0x61,0xa3,0xc3,0x67,0x36,0x69,0x52,0x6b,0x10,0x72,0xa2,0x99,0x10,0x69,0xa3, -0x99,1,0x61,0x30,0x62,0x7a,0x13,0x61,0x6e,0x77,0x61,0x7b,0x12,0x6c,0x6f,0x67, -0x75,2,0x6c,0x32,0x74,0x34,0x76,0x12,0x69,0x65,0x74,0xa3,0x7f,0x10,0x65,0x89, -0x12,0x68,0x61,0x6d,0xa3,0x6a,1,0x6c,0x2a,0x6e,0x10,0x67,0xa3,0x62,0x10,0x75, -0x68,0x11,0x67,0x75,0x69,0x11,0x6e,0x67,0x99,1,0x67,0x32,0x6e,0x14,0x6b,0x6e, -0x6f,0x77,0x6e,0xa3,0x67,0x11,0x61,0x72,0x8a,0x13,0x69,0x74,0x69,0x63,0x8b,0x71, -0xc1,0x13,0x71,0xa2,0xde,0x72,0xa2,0xe3,0x73,6,0x69,0x8a,0x69,0x72,0x6f,0xa2, -0x4c,0x75,0xa2,0x75,0x79,1,0x6c,0x46,0x72,4,0x63,0x65,0x65,0xa3,0x5f,0x69, -0x2c,0x6a,0xa3,0x60,0x6e,0xa3,0x61,0x11,0x61,0x63,0x65,0x10,0x6f,0x94,0x16,0x74, -0x69,0x6e,0x61,0x67,0x72,0x69,0x95,2,0x64,0x3c,0x67,0x4c,0x6e,1,0x64,0xa3, -0x91,0x68,0x62,0x12,0x61,0x6c,0x61,0x63,0x10,0x64,0xa2,0xa6,0x12,0x68,0x61,0x6d, -0xa3,0xa6,0x17,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa3,0x70,2,0x67,0x3a, -0x72,0x52,0x79,0x10,0x6f,0xa2,0xb0,0x12,0x6d,0x62,0x6f,0xa3,0xb0,1,0x64,0x26, -0x6f,0xa3,0xb8,0xa2,0xb7,0x12,0x69,0x61,0x6e,0xa3,0xb7,0x10,0x61,0xa2,0x98,0x16, -0x73,0x6f,0x6d,0x70,0x65,0x6e,0x67,0xa3,0x98,0x11,0x6e,0x64,0xa2,0x71,0x14,0x61, -0x6e,0x65,0x73,0x65,0xa3,0x71,0x61,0x5c,0x67,0xa2,0x43,0x68,1,0x61,0x2a,0x72, -0x10,0x64,0xa3,0x97,2,0x72,0x28,0x76,0x30,0x77,0x87,0x12,0x61,0x64,0x61,0xa3, -0x97,0x12,0x69,0x61,0x6e,0x87,2,0x6d,0x40,0x72,0x58,0x75,0x10,0x72,0xa2,0x6f, -0x15,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0x6f,1,0x61,0x26,0x72,0xa3,0x7e,0x14, -0x72,0x69,0x74,0x61,0x6e,0xa3,0x7e,1,0x61,0xa3,0x5e,0x62,0xa3,0x85,0x11,0x6e, -0x77,0xa3,0x70,0x11,0x61,0x61,1,0x63,0x2f,0x69,0x23,3,0x65,0x3e,0x6a,0x48, -0x6f,0x4e,0x75,0x10,0x6e,1,0x69,0x24,0x72,0x61,0x10,0x63,0x61,0x13,0x6a,0x61, -0x6e,0x67,0xa3,0x6e,0x11,0x6e,0x67,0xa3,0x6e,1,0x68,0x2a,0x72,0x10,0x6f,0xa3, -0x5d,0x10,0x67,0xa3,0xb6,0x6e,0xa2,0x83,0x6f,0xa4,1,0x70,5,0x6c,0x1e,0x6c, -0x44,0x72,0x4a,0x73,0x1b,0x61,0x6c,0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76, -0x69,0xa3,0x7b,0x11,0x72,0x64,0xa3,0x5c,0x11,0x74,0x69,0xa3,0x7d,0x61,0x7c,0x65, -0xa2,0x54,0x68,3,0x61,0x3e,0x6c,0x4e,0x6e,0x5e,0x6f,0x16,0x65,0x6e,0x69,0x63, -0x69,0x61,0x6e,0xa3,0x5b,0x10,0x67,0xa2,0x5a,0x12,0x73,0x70,0x61,0xa3,0x5a,2, -0x69,0xa3,0x7a,0x70,0xa3,0x7b,0x76,0xa3,0x7c,0x10,0x78,0xa3,0x5b,2,0x68,0x3e, -0x6c,0x50,0x75,0x10,0x63,0xa2,0xa5,0x14,0x69,0x6e,0x68,0x61,0x75,0xa3,0xa5,0x17, -0x61,0x77,0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0x4b,0x10,0x6d,0xa2,0x90,0x14,0x79, -0x72,0x65,0x6e,0x65,0xa3,0x90,0x11,0x72,0x6d,0xa3,0x59,6,0x6b,0x36,0x6b,0x56, -0x73,0x6e,0x75,0x74,0x79,0x11,0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61, -0x63,0x68,0x75,0x65,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xba,1,0x67,0x2e,0x6f,0xa2, -0x57,0x10,0x6f,0xa3,0x57,0x10,0x62,0xa3,0x84,0x11,0x68,0x75,0xa3,0x96,0x12,0x73, -0x68,0x75,0xa3,0x96,0x61,0x42,0x62,0x9e,0x65,0x10,0x77,1,0x61,0xa3,0xaa,0x74, -0x14,0x61,0x69,0x6c,0x75,0x65,0x97,3,0x62,0x32,0x67,0x40,0x6e,0x56,0x72,0x10, -0x62,0xa3,0x8e,0x15,0x61,0x74,0x61,0x65,0x61,0x6e,0xa3,0x8f,0x10,0x6d,0xa2,0xc7, -0x15,0x75,0x6e,0x64,0x61,0x72,0x69,0xa3,0xc7,0x10,0x64,0xa2,0xbb,0x16,0x69,0x6e, -0x61,0x67,0x61,0x72,0x69,0xa3,0xbb,0x11,0x61,0x74,0xa3,0x8f,4,0x67,0x3c,0x6c, -0x4e,0x72,0xa2,0x8e,0x73,0xa2,0x9c,0x75,0x11,0x67,0x72,0xa3,0xc2,1,0x61,0x2a, -0x68,0x11,0x61,0x6d,0x5b,0x10,0x6d,0x5b,1,0x63,0xa2,0x6a,0x64,6,0x70,0x41, -0x70,0x3a,0x73,0x58,0x74,0x86,0x75,0x14,0x79,0x67,0x68,0x75,0x72,0xa3,0xc2,0x11, -0x65,0x72,1,0x6d,0x2c,0x73,0x12,0x69,0x61,0x6e,0x9b,0x11,0x69,0x63,0xa3,0x59, -0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e, -0xa3,0x85,0x13,0x64,0x69,0x61,0x6e,0xa3,0xb8,0x14,0x75,0x72,0x6b,0x69,0x63,0xa3, -0x58,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69, -0x61,0x6e,0xa3,0x8e,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa3,0x4c,0x14, -0x74,0x61,0x6c,0x69,0x63,0x5d,1,0x68,0x26,0x6b,0xa3,0x6d,0x12,0x69,0x6b,0x69, -0xa3,0x6d,2,0x69,0x2c,0x6b,0x30,0x79,0x10,0x61,0x5f,0x11,0x79,0x61,0x5f,0x10, -0x68,0xa3,0x58,2,0x61,0x36,0x67,0x3c,0x6d,0x10,0x61,0x84,0x12,0x6e,0x79,0x61, -0x85,0x11,0x67,0x65,0xa3,0xab,0x10,0x65,0xa3,0xab,0x68,0xc3,0x15,0x6b,0xc2,0x2c, -0x6b,0xa4,0x17,0x6c,0xa4,0xba,0x6d,8,0x6f,0x46,0x6f,0x48,0x72,0x74,0x74,0x80, -0x75,0x86,0x79,1,0x61,0x28,0x6d,0x10,0x72,0x59,0x13,0x6e,0x6d,0x61,0x72,0x59, -2,0x64,0x2e,0x6e,0x32,0x6f,0x10,0x6e,0xa3,0x72,0x10,0x69,0xa3,0xa3,0x10,0x67, -0x56,0x14,0x6f,0x6c,0x69,0x61,0x6e,0x57,0x10,0x6f,0xa2,0x95,0x10,0x6f,0xa3,0x95, -0x11,0x65,0x69,0xa3,0x73,0x11,0x6c,0x74,0xa2,0xa4,0x12,0x61,0x6e,0x69,0xa3,0xa4, -0x61,0x36,0x65,0xa2,0x67,0x69,0xa2,0xbd,0x6c,0x11,0x79,0x6d,0x55,6,0x6e,0x38, -0x6e,0x32,0x72,0x5c,0x73,0x6c,0x79,0x10,0x61,0xa3,0x55,1,0x64,0x38,0x69,0xa2, -0x79,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,0x79,0xa2,0x54,0x12,0x61,0x69,0x63, -0xa3,0x54,0x10,0x63,0xa2,0xa9,0x12,0x68,0x65,0x6e,0xa3,0xa9,0x18,0x61,0x72,0x61, -0x6d,0x67,0x6f,0x6e,0x64,0x69,0xa3,0xaf,0x68,0x36,0x6b,0x4c,0x6c,0x15,0x61,0x79, -0x61,0x6c,0x61,0x6d,0x55,1,0x61,0x26,0x6a,0xa3,0xa0,0x13,0x6a,0x61,0x6e,0x69, -0xa3,0xa0,0x10,0x61,0xa2,0xb4,0x12,0x73,0x61,0x72,0xa3,0xb4,3,0x64,0x78,0x65, -0x94,0x6e,0xa2,0x42,0x72,1,0x63,0xa3,0x8d,0x6f,0xa2,0x56,0x13,0x69,0x74,0x69, -0x63,1,0x63,0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73, -0xa3,0x56,0x15,0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0x8d,1,0x65,0x26,0x66,0xa3, -0xb5,0x16,0x66,0x61,0x69,0x64,0x72,0x69,0x6e,0xa3,0xb5,0x17,0x74,0x65,0x69,0x6d, -0x61,0x79,0x65,0x6b,0xa3,0x73,0x10,0x64,0xa2,0x8c,0x17,0x65,0x6b,0x69,0x6b,0x61, -0x6b,0x75,0x69,0xa3,0x8c,0x11,0x61,0x6f,0xa3,0x5c,6,0x6e,0x1a,0x6e,0x34,0x6f, -0x38,0x70,0x3e,0x74,0x11,0x68,0x69,0xa3,0x78,0x11,0x64,0x61,0x4b,0x11,0x72,0x65, -0xa3,0x77,0x11,0x65,0x6c,0xa3,0x8a,0x61,0x32,0x68,0xa2,0x44,0x69,0x11,0x74,0x73, -0xa3,0xbf,5,0x74,0x23,0x74,0x34,0x77,0x56,0x79,0x13,0x61,0x68,0x6c,0x69,0xa3, -0x4f,0x14,0x61,0x6b,0x61,0x6e,0x61,0x4c,0x19,0x6f,0x72,0x68,0x69,0x72,0x61,0x67, -0x61,0x6e,0x61,0x8d,0x10,0x69,0xa3,0xc6,0x69,0x38,0x6c,0x40,0x6e,1,0x61,0x4d, -0x6e,0x12,0x61,0x64,0x61,0x4b,0x12,0x74,0x68,0x69,0xa3,0x78,0x10,0x69,0xa3,0x4f, -4,0x61,0x40,0x69,0x52,0x6d,0x70,0x6f,0x7c,0x75,0x15,0x64,0x61,0x77,0x61,0x64, -0x69,0xa3,0x91,0x10,0x72,0x92,0x15,0x6f,0x73,0x68,0x74,0x68,0x69,0x93,0x1d,0x74, -0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xbf,1, -0x65,0x24,0x72,0x4f,0x10,0x72,0x4f,0x10,0x6a,0xa2,0x9d,0x11,0x6b,0x69,0xa3,0x9d, -4,0x61,0x5c,0x65,0x90,0x69,0xa0,0x6f,0xa2,0x5d,0x79,1,0x63,0x34,0x64,0x10, -0x69,0xa2,0x6c,0x11,0x61,0x6e,0xa3,0x6c,0x10,0x69,0xa2,0x6b,0x11,0x61,0x6e,0xa3, -0x6b,2,0x6e,0x42,0x6f,0x46,0x74,3,0x66,0xa3,0x50,0x67,0xa3,0x51,0x69,0x24, -0x6e,0x53,0x10,0x6e,0x53,0x10,0x61,0xa3,0x6a,0x50,0x10,0x6f,0x51,0x11,0x70,0x63, -0xa2,0x52,0x11,0x68,0x61,0xa3,0x52,2,0x6d,0x2e,0x6e,0x36,0x73,0x10,0x75,0xa3, -0x83,0x10,0x62,0x80,0x10,0x75,0x81,2,0x61,0xa3,0x53,0x62,0x83,0x65,0x11,0x61, -0x72,1,0x61,0xa3,0x53,0x62,0x83,0x11,0x6d,0x61,0xa3,0x8b,0x68,0x6e,0x69,0xa2, -0x95,0x6a,2,0x61,0x30,0x70,0x52,0x75,0x11,0x72,0x63,0xa3,0x94,1,0x6d,0x38, -0x76,0x10,0x61,0xa2,0x4e,0x13,0x6e,0x65,0x73,0x65,0xa3,0x4e,0x10,0x6f,0xa3,0xad, -0x11,0x61,0x6e,0xa3,0x69,6,0x6c,0x1e,0x6c,0x34,0x6d,0x3a,0x72,0x48,0x75,0x11, -0x6e,0x67,0xa3,0x4c,0x11,0x75,0x77,0xa3,0x9c,0x10,0x6e,1,0x67,0xa3,0x4b,0x70, -0xa3,0xba,0x11,0x6b,0x74,0x8d,0x61,0x3c,0x65,0xa2,0x43,0x69,0x11,0x72,0x61,0x48, -0x13,0x67,0x61,0x6e,0x61,0x49,1,0x6e,0x34,0x74,0x10,0x72,0xa2,0xa2,0x11,0x61, -0x6e,0xa3,0xa2,0x42,6,0x6f,0xe,0x6f,0x77,0x73,0xa3,0x49,0x74,0xa3,0x4a,0x75, -0x12,0x6e,0x6f,0x6f,0x77,0x62,0xa3,0xac,0x67,0x3e,0x69,0x42,0x19,0x66,0x69,0x72, -0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa3,0xb6,0x44,0x11,0x75,0x6c,0x45,0x11,0x62, -0x72,0x46,0x11,0x65,0x77,0x47,2,0x6d,0x2e,0x6e,0x4a,0x74,0x11,0x61,0x6c,0x5d, -0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,0x61,0x69,0x63,0xa3,0x74, -2,0x64,0x66,0x68,0x6a,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69,0x6f,0x6e,0x61, -0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e,0xa3,0x7d,0x13, -0x6c,0x61,0x76,0x69,0xa3,0x7a,0x10,0x73,0xa3,0x4d,0x15,0x65,0x72,0x69,0x74,0x65, -0x64,0x23,0x64,0xc1,0xd,0x64,0xa2,0x7a,0x65,0xa2,0xc1,0x67,4,0x65,0x82,0x6c, -0x9a,0x6f,0xa2,0x46,0x72,0xa2,0x55,0x75,2,0x6a,0x3c,0x6e,0x4e,0x72,1,0x6d, -0x24,0x75,0x41,0x13,0x75,0x6b,0x68,0x69,0x41,1,0x61,0x24,0x72,0x3f,0x13,0x72, -0x61,0x74,0x69,0x3f,0x18,0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa3,0xb3, -0x10,0x6f,1,0x6b,0xa3,0x48,0x72,0x38,0x13,0x67,0x69,0x61,0x6e,0x39,0x11,0x61, -0x67,0x90,0x15,0x6f,0x6c,0x69,0x74,0x69,0x63,0x91,1,0x6e,0x30,0x74,0x10,0x68, -0x3a,0x11,0x69,0x63,0x3b,1,0x67,0xa3,0xb3,0x6d,0xa3,0xaf,1,0x61,0x32,0x65, -1,0x65,0x24,0x6b,0x3d,0x10,0x6b,0x3d,0x10,0x6e,0xa2,0x89,0x12,0x74,0x68,0x61, -0xa3,0x89,4,0x65,0x46,0x69,0x6c,0x6f,0x8c,0x73,0x9a,0x75,0x11,0x70,0x6c,0xa2, -0x87,0x13,0x6f,0x79,0x61,0x6e,0xa3,0x87,1,0x73,0x38,0x76,0x10,0x61,0x34,0x15, -0x6e,0x61,0x67,0x61,0x72,0x69,0x35,0x13,0x65,0x72,0x65,0x74,0x33,1,0x61,0x36, -0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa3,0xbe,0x10,0x6b,0xa3,0xbe,0x11, -0x67,0x72,0xa2,0xb2,0x10,0x61,0xa3,0xb2,0x11,0x72,0x74,0x33,2,0x67,0x3a,0x6c, -0x72,0x74,0x11,0x68,0x69,0x36,0x13,0x6f,0x70,0x69,0x63,0x37,0x10,0x79,2,0x64, -0xa3,0x45,0x68,0xa3,0x46,0x70,0xa2,0x47,0x1e,0x74,0x69,0x61,0x6e,0x68,0x69,0x65, -0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x47,1,0x62,0x36,0x79,0x10,0x6d, -0xa2,0xb9,0x12,0x61,0x69,0x63,0xa3,0xb9,0x10,0x61,0xa2,0x88,0x12,0x73,0x61,0x6e, -0xa3,0x88,0x61,0xa2,0xc9,0x62,0xa4,0x2e,0x63,6,0x6f,0x52,0x6f,0x76,0x70,0x92, -0x75,0xa2,0x41,0x79,1,0x70,0x3e,0x72,2,0x69,0x2a,0x6c,0x31,0x73,0xa3,0x44, -0x13,0x6c,0x6c,0x69,0x63,0x31,0x10,0x72,1,0x69,0x34,0x6f,0x15,0x6d,0x69,0x6e, -0x6f,0x61,0x6e,0xa3,0xc1,0x11,0x6f,0x74,0x7f,1,0x6d,0x30,0x70,0x10,0x74,0x2e, -0x11,0x69,0x63,0x2f,0x12,0x6d,0x6f,0x6e,0x21,1,0x6d,0x28,0x72,0x10,0x74,0x7f, -0x10,0x6e,0xa3,0xc1,0x16,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa3,0x65,0x61,0x32, -0x68,0xa2,0x41,0x69,0x11,0x72,0x74,0xa3,0x43,3,0x6b,0x4c,0x6e,0x50,0x72,0x76, -0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69,0x61,0x6e, -0xa3,0x9f,0x10,0x6d,0xa3,0x76,1,0x61,0x24,0x73,0x71,0x1d,0x64,0x69,0x61,0x6e, -0x61,0x62,0x6f,0x72,0x69,0x67,0x69,0x6e,0x61,0x6c,0x71,0x10,0x69,0xa2,0x68,0x11, -0x61,0x6e,0xa3,0x68,3,0x61,0x32,0x65,0x44,0x6f,0x52,0x72,0x10,0x73,0xa3,0xbd, -1,0x6b,0x26,0x6d,0xa3,0x42,0x11,0x6d,0x61,0xa3,0x76,0x10,0x72,0x2c,0x13,0x6f, -0x6b,0x65,0x65,0x2d,0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa3,0xbd,6,0x68, -0x4a,0x68,0x48,0x6e,0x4e,0x72,0x76,0x76,1,0x65,0x2a,0x73,0x10,0x74,0xa3,0x75, -0x13,0x73,0x74,0x61,0x6e,0xa3,0x75,0x11,0x6f,0x6d,0xa3,0xa1,0x11,0x61,0x74,0x1f, -0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73, -0xa3,0x9c,1,0x61,0x3e,0x6d,2,0x65,0x2a,0x69,0xa3,0x74,0x6e,0x27,0x13,0x6e, -0x69,0x61,0x6e,0x27,0x10,0x62,0x24,0x11,0x69,0x63,0x25,0x64,0x30,0x66,0x44,0x67, -0x11,0x68,0x62,0xa3,0x9f,0x10,0x6c,1,0x61,0x26,0x6d,0xa3,0xa7,0x10,0x6d,0xa3, -0xa7,0x11,0x61,0x6b,0xa3,0x93,6,0x6c,0x3c,0x6c,0x52,0x6f,0x56,0x72,0x66,0x75, -1,0x67,0x30,0x68,1,0x64,0x79,0x69,0x10,0x64,0x79,0x10,0x69,0x8e,0x13,0x6e, -0x65,0x73,0x65,0x8f,0x11,0x69,0x73,0xa1,0x11,0x70,0x6f,0x2a,0x13,0x6d,0x6f,0x66, -0x6f,0x2b,0x10,0x61,1,0x68,0x2e,0x69,0x7c,0x12,0x6c,0x6c,0x65,0x7d,0xa2,0x41, -0x11,0x6d,0x69,0xa3,0x41,0x61,0x48,0x65,0x9c,0x68,1,0x61,0x2a,0x6b,0x10,0x73, -0xa3,0xa8,0x15,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa3,0xa8,3,0x6c,0x3a,0x6d,0x48, -0x73,0x54,0x74,1,0x61,0x24,0x6b,0x9f,0x10,0x6b,0x9f,0x10,0x69,0x9c,0x13,0x6e, -0x65,0x73,0x65,0x9d,0x10,0x75,0xa2,0x82,0x10,0x6d,0xa3,0x82,0x10,0x73,0xa2,0x86, -0x13,0x61,0x76,0x61,0x68,0xa3,0x86,0x11,0x6e,0x67,0x28,0x12,0x61,0x6c,0x69,0x29, -3,0x6c,0x42,0x6e,0x90,0x74,0xa2,0x46,0x76,0x24,0x17,0x6f,0x77,0x65,0x6c,0x6a, -0x61,0x6d,0x6f,0x25,0x22,1,0x65,0x54,0x76,0x28,1,0x73,0x38,0x74,0x2a,0x17, -0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x2b,0x16,0x79,0x6c,0x6c,0x61,0x62,0x6c, -0x65,0x29,0x18,0x61,0x64,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x23,1,0x61,0x21, -0x6f,0x1a,0x74,0x61,0x70,0x70,0x6c,0x69,0x63,0x61,0x62,0x6c,0x65,0x21,0x26,0x1a, -0x72,0x61,0x69,0x6c,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x27,1,0x6e,0x2c,0x79, -0x22,0x11,0x65,0x73,0x23,0x20,0x10,0x6f,0x21,1,0x6e,0x2c,0x79,0x22,0x11,0x65, -0x73,0x23,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,0x65,0x73, -0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e, -0x3a,0x79,0x22,0x11,0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10, -0x6f,0x21,0xb,0x72,0x39,0x76,0xc,0x76,0x33,0x78,0x2a,0x7a,0x11,0x77,0x6a,0x43, -0x10,0x78,0x21,0x72,0x28,0x73,0x50,0x74,0x31,1,0x65,0x24,0x69,0x39,0x1e,0x67, -0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x39,1, -0x6d,0x35,0x70,0x18,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x35,0x6c,0x1f, -0x6c,0x3c,0x6f,0x4a,0x70,1,0x70,0x37,0x72,0x14,0x65,0x70,0x65,0x6e,0x64,0x37, -0x28,1,0x66,0x2b,0x76,0x2c,0x10,0x74,0x2f,0x13,0x74,0x68,0x65,0x72,0x21,0x63, -0x4c,0x65,0x64,0x67,1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,0x74,0x65,0x72, -0x7a,0x77,0x6a,0x41,0x10,0x7a,0x41,2,0x6e,0x23,0x6f,0x24,0x72,0x25,0x14,0x6e, -0x74,0x72,0x6f,0x6c,0x23,2,0x62,0x34,0x6d,0x4e,0x78,0x26,0x13,0x74,0x65,0x6e, -0x64,0x27,0x3a,1,0x61,0x24,0x67,0x3d,0x11,0x73,0x65,0x3a,0x12,0x67,0x61,0x7a, -0x3d,0x3e,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x3f,9,0x6e,0x4a,0x6e,0x34, -0x6f,0x44,0x73,0x60,0x75,0x94,0x78,0x10,0x78,0x21,0x10,0x75,0x2a,0x14,0x6d,0x65, -0x72,0x69,0x63,0x2b,1,0x6c,0x2c,0x74,0x12,0x68,0x65,0x72,0x21,0x14,0x65,0x74, -0x74,0x65,0x72,0x2d,3,0x63,0x36,0x65,0x46,0x70,0x31,0x74,0x32,0x12,0x65,0x72, -0x6d,0x33,0x3c,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3d,0x2e,0x10,0x70,0x2f, -0x10,0x70,0x34,0x12,0x70,0x65,0x72,0x35,0x61,0x46,0x63,0x52,0x65,0x64,0x66,0x72, -0x6c,2,0x65,0x2d,0x66,0x3b,0x6f,0x28,0x12,0x77,0x65,0x72,0x29,0x10,0x74,0x22, -0x12,0x65,0x72,0x6d,0x23,1,0x6c,0x24,0x72,0x37,0x24,0x12,0x6f,0x73,0x65,0x25, -0x10,0x78,0x38,0x13,0x74,0x65,0x6e,0x64,0x39,0x10,0x6f,0x26,0x13,0x72,0x6d,0x61, -0x74,0x27,0,0x10,0x6c,0x88,0x72,0x40,0x72,0x36,0x73,0x5e,0x77,0x7a,0x78,0x8a, -0x7a,0x11,0x77,0x6a,0x4b,1,0x65,0x24,0x69,0x3b,0x1e,0x67,0x69,0x6f,0x6e,0x61, -0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x3b,1,0x69,0x24,0x71,0x3f, -0x18,0x6e,0x67,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x3f,0x17,0x73,0x65,0x67,0x73, -0x70,0x61,0x63,0x65,0x4d,0x10,0x78,0x21,0x6c,0x36,0x6d,0x3c,0x6e,0x76,0x6f,0x13, -0x74,0x68,0x65,0x72,0x21,1,0x65,0x23,0x66,0x35,3,0x62,0x37,0x69,0x28,0x6c, -0x29,0x6e,0x2b,0x10,0x64,1,0x6c,0x34,0x6e,0x11,0x75,0x6d,0x2a,0x12,0x6c,0x65, -0x74,0x37,0x14,0x65,0x74,0x74,0x65,0x72,0x29,2,0x65,0x36,0x6c,0x39,0x75,0x2c, -0x14,0x6d,0x65,0x72,0x69,0x63,0x2d,0x14,0x77,0x6c,0x69,0x6e,0x65,0x39,0x66,0x3f, -0x66,0x40,0x67,0x4e,0x68,0x70,0x6b,0x10,0x61,0x26,0x15,0x74,0x61,0x6b,0x61,0x6e, -0x61,0x27,0x10,0x6f,0x24,0x13,0x72,0x6d,0x61,0x74,0x25,1,0x61,0x3a,0x6c,0x19, -0x75,0x65,0x61,0x66,0x74,0x65,0x72,0x7a,0x77,0x6a,0x49,0x10,0x7a,0x49,1,0x65, -0x24,0x6c,0x3d,0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,0x61, -0x86,0x63,0x92,0x64,0x94,0x65,2,0x62,0x44,0x6d,0x5e,0x78,0x2e,0x13,0x74,0x65, -0x6e,0x64,0x32,0x15,0x6e,0x75,0x6d,0x6c,0x65,0x74,0x2f,0x42,1,0x61,0x24,0x67, -0x45,0x11,0x73,0x65,0x42,0x12,0x67,0x61,0x7a,0x45,0x46,0x16,0x6f,0x64,0x69,0x66, -0x69,0x65,0x72,0x47,0x15,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x10,0x72,0x31,1, -0x6f,0x24,0x71,0x41,0x18,0x75,0x62,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x41,2, -0x63,0x32,0x6e,0x3c,0x6f,0x22,0x12,0x70,0x65,0x6e,0x23,0x24,0x13,0x6c,0x6f,0x73, -0x65,0x25,0x20,0x12,0x6f,0x6e,0x65,0x21,6,0x6f,0x65,0x6f,0x4a,0x72,0x5c,0x74, -0x64,0x76,0x1d,0x69,0x73,0x75,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x6c,0x65,0x66, -0x74,0x3d,0x18,0x76,0x65,0x72,0x73,0x74,0x72,0x75,0x63,0x6b,0x2d,0x13,0x69,0x67, -0x68,0x74,0x2f,0x11,0x6f,0x70,0x30,0x12,0x61,0x6e,0x64,2,0x62,0x32,0x6c,0x62, -0x72,0x13,0x69,0x67,0x68,0x74,0x3b,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x32,0x12,0x61, -0x6e,0x64,1,0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x35,0x12,0x65,0x66,0x74, -0x3f,0x12,0x65,0x66,0x74,0x36,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x39, -0x62,0x2c,0x6c,0x5c,0x6e,0x10,0x61,0x21,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x22,0x12, -0x61,0x6e,0x64,1,0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x27,0x12,0x65,0x66, -0x74,0x25,0x12,0x65,0x66,0x74,0x28,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74, -0x2b,0xd,0x6e,0xaa,0x72,0x70,0x72,0x92,0x73,0xa2,0x46,0x74,0xa2,0x54,0x76,1, -0x69,0x60,0x6f,0x12,0x77,0x65,0x6c,0x62,1,0x64,0x3a,0x69,0x19,0x6e,0x64,0x65, -0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x67,0x17,0x65,0x70,0x65,0x6e,0x64,0x65,0x6e, -0x74,0x65,1,0x72,0x2e,0x73,0x13,0x61,0x72,0x67,0x61,0x61,0x12,0x61,0x6d,0x61, -0x5f,0x1d,0x65,0x67,0x69,0x73,0x74,0x65,0x72,0x73,0x68,0x69,0x66,0x74,0x65,0x72, -0x57,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65, -0x72,0x59,0x12,0x6f,0x6e,0x65,1,0x6c,0x2c,0x6d,0x12,0x61,0x72,0x6b,0x5d,0x14, -0x65,0x74,0x74,0x65,0x72,0x5b,0x6e,0x3c,0x6f,0x7c,0x70,0x18,0x75,0x72,0x65,0x6b, -0x69,0x6c,0x6c,0x65,0x72,0x55,1,0x6f,0x4c,0x75,1,0x6b,0x3c,0x6d,0x12,0x62, -0x65,0x72,0x50,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x53,0x11,0x74,0x61,0x4f,0x16, -0x6e,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x4d,0x13,0x74,0x68,0x65,0x72,0x21,0x67,0x3e, -0x67,0x4a,0x69,0x64,0x6a,0x82,0x6d,0x1d,0x6f,0x64,0x69,0x66,0x79,0x69,0x6e,0x67, -0x6c,0x65,0x74,0x74,0x65,0x72,0x4b,0x1c,0x65,0x6d,0x69,0x6e,0x61,0x74,0x69,0x6f, -0x6e,0x6d,0x61,0x72,0x6b,0x45,0x1e,0x6e,0x76,0x69,0x73,0x69,0x62,0x6c,0x65,0x73, -0x74,0x61,0x63,0x6b,0x65,0x72,0x47,0x14,0x6f,0x69,0x6e,0x65,0x72,0x49,0x61,0xa2, -0xba,0x62,0xa2,0xc0,0x63,1,0x61,0xa2,0xa2,0x6f,0x16,0x6e,0x73,0x6f,0x6e,0x61, -0x6e,0x74,0x2a,8,0x6b,0x67,0x6b,0x48,0x6d,0x52,0x70,0x5c,0x73,0xa2,0x42,0x77, -0x19,0x69,0x74,0x68,0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x43,0x14,0x69,0x6c,0x6c, -0x65,0x72,0x35,0x14,0x65,0x64,0x69,0x61,0x6c,0x37,1,0x6c,0x52,0x72,0x10,0x65, -1,0x63,0x2e,0x66,0x13,0x69,0x78,0x65,0x64,0x3d,0x19,0x65,0x64,0x69,0x6e,0x67, -0x72,0x65,0x70,0x68,0x61,0x3b,0x18,0x61,0x63,0x65,0x68,0x6f,0x6c,0x64,0x65,0x72, -0x39,0x10,0x75,1,0x62,0x3e,0x63,0x1b,0x63,0x65,0x65,0x64,0x69,0x6e,0x67,0x72, -0x65,0x70,0x68,0x61,0x41,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x64,0x3f,0x64,0x4c,0x66, -0x52,0x68,0x5a,0x69,0x1e,0x6e,0x69,0x74,0x69,0x61,0x6c,0x70,0x6f,0x73,0x74,0x66, -0x69,0x78,0x65,0x64,0x33,0x12,0x65,0x61,0x64,0x2d,0x13,0x69,0x6e,0x61,0x6c,0x2f, -0x18,0x65,0x61,0x64,0x6c,0x65,0x74,0x74,0x65,0x72,0x31,0x1d,0x6e,0x74,0x69,0x6c, -0x6c,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x29,0x16,0x76,0x61,0x67,0x72, -0x61,0x68,0x61,0x23,1,0x69,0x4a,0x72,0x10,0x61,0x1f,0x68,0x6d,0x69,0x6a,0x6f, -0x69,0x6e,0x69,0x6e,0x67,0x6e,0x75,0x6d,0x62,0x65,0x72,0x27,0x12,0x6e,0x64,0x75, -0x25,2,0x72,0x38,0x74,0x46,0x75,0x26,0x15,0x70,0x72,0x69,0x67,0x68,0x74,0x27, -0x20,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x21,1,0x72,0x24,0x75,0x25,0x22,0x18, -0x61,0x6e,0x73,0x66,0x6f,0x72,0x6d,0x65,0x64,1,0x72,0x32,0x75,0x15,0x70,0x72, -0x69,0x67,0x68,0x74,0x25,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x23,0xd,0x6e,0xc1, -0x86,0x73,0xa8,0x73,0x4c,0x74,0xa2,0x76,0x75,0xa2,0x83,0x7a,0xd8,0x70,0,2, -0x6c,0xd9,0x20,0,0x70,0xd9,0x40,0,0x73,0xc3,0,0xfe,0xf,0,0,0, -7,0x6f,0x3c,0x6f,0xff,8,0,0,0,0x70,0x3a,0x75,0x6e,0x79,0x13,0x6d, -0x62,0x6f,0x6c,0xff,0xf,0,0,0,0x11,0x61,0x63,1,0x65,0x34,0x69,0x15, -0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0,0x18,0x73,0x65,0x70,0x61,0x72,0x61,0x74, -0x6f,0x72,0xc3,0,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0xe1,0,0,0x63, -0xff,2,0,0,0,0x65,0x38,0x6b,0xff,4,0,0,0,0x6d,0xff,1, -0,0,0,0x16,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x70,0,0x1d,0x69, -0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x31,1,0x6e, -0x40,0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72, -0x25,0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x23,0x6e,0xa2,0x69,0x6f,0xa2, -0x89,0x70,0xfe,0x30,0xf8,0,0,9,0x69,0x33,0x69,0xff,0x10,0,0,0, -0x6f,0xfd,0x80,0,0,0x72,0x54,0x73,0xf9,0,0,0x75,0x12,0x6e,0x63,0x74, -0xfe,0x30,0xf8,0,0,0x15,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x30,0xf8,0, -0,0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0xdd,0,0,0x61,0x48,0x63, -0xfd,0x40,0,0,0x64,0xe9,0,0,0x65,0xfd,0x20,0,0,0x66,0xff,0x20, -0,0,0,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72, -0x61,0x74,0x6f,0x72,0xd9,0x40,0,0xbe,0,3,0x64,0xa7,0,0x6c,0xab,0, -0x6f,0x30,0x75,0x13,0x6d,0x62,0x65,0x72,0xbf,0,0xb2,0,0x1b,0x6e,0x73,0x70, -0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa1,1,0x70,0x92,0x74,0x12,0x68, -0x65,0x72,0xe6,0x80,1,3,0x6c,0x40,0x6e,0x4a,0x70,0x56,0x73,0x14,0x79,0x6d, -0x62,0x6f,0x6c,0xff,8,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x61,0x14, -0x75,0x6d,0x62,0x65,0x72,0xb3,0,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69, -0x6f,0x6e,0xfd,0x80,0,0,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61, -0x74,0x69,0x6f,0x6e,0xf9,0,0,0x66,0xc0,0xc4,0x66,0xa2,0x47,0x69,0xa2,0x64, -0x6c,0xa2,0x79,0x6d,0xa4,0xc0,4,0x61,0x6c,0x63,0xa5,0,0x65,0xa3,0x80,0x6e, -0xa1,0x6f,0x15,0x64,0x69,0x66,0x69,0x65,0x72,1,0x6c,0x38,0x73,0x14,0x79,0x6d, -0x62,0x6f,0x6c,0xff,4,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x41,1, -0x72,0x3c,0x74,0x16,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,1,0,0,0, -0x10,0x6b,0xa5,0xc0,1,0x69,0x32,0x6f,0x13,0x72,0x6d,0x61,0x74,0xdb,0,0, -0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff, -0x20,0,0,0,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,0x70,0x75,0x6e,0x63, -0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x10,0,0,0,0x9c,7,0x6d,0x18, -0x6d,0x41,0x6f,0x28,0x74,0x31,0x75,0x25,0x60,0x1c,0x77,0x65,0x72,0x63,0x61,0x73, -0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x29,0x63,0x3d,0x65,0x28,0x69,0x42,0x6c,0x29, -0x13,0x74,0x74,0x65,0x72,0x9c,0x15,0x6e,0x75,0x6d,0x62,0x65,0x72,0xab,0,0x1a, -0x6e,0x65,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x20,0,0x63,0x46, -0x64,0xa2,0x96,0x65,0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72, -0x6b,0xa3,0x80,0xe6,0x80,1,7,0x6e,0x57,0x6e,0x52,0x6f,0x5e,0x73,0xe1,0, -0,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff, -2,0,0,0,0x22,0x12,0x74,0x72,0x6c,0xd9,0x80,0,0xdc,0,0,1, -0x6d,0x62,0x6e,1,0x6e,0x30,0x74,0x12,0x72,0x6f,0x6c,0xd9,0x80,0,0x1f,0x65, -0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd, -0x40,0,0,0x19,0x62,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0xc0, -0x61,0x58,0x63,0xd9,0x80,0,0x66,0xdb,0,0,0x6c,0x1d,0x6f,0x73,0x65,0x70, -0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x20,0,0,0x18,0x73, -0x65,0x64,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,2,0x61,0x32,0x65,0x50,0x69,0x12, -0x67,0x69,0x74,0xa7,0,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, -0x69,0x6f,0x6e,0xe9,0,0,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62, -0x65,0x72,0xa7,0 +0x61,0x6b,0xc3,8,2,0x64,0x4a,0x6e,0xa2,0x91,0x73,1,0x63,0xd9,0x40,3, +0x6f,0x16,0x63,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0xd9,0x40,3,2,0x63,0xa2,0x44, +0x65,0xa2,0x6c,0x73,0x40,2,0x62,0x48,0x74,0x64,0x75,0xa2,0x48,0x1b,0x6e,0x61, +0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0xa3,0x48,0x44,0x1c,0x69,0x6e, +0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x45,0x46,1,0x61,0x40, +0x72,0x1c,0x69,0x6e,0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x47, +0x11,0x72,0x74,0x41,0x3e,0x10,0x6f,1,0x6d,0x30,0x6e,0x14,0x74,0x69,0x6e,0x75, +0x65,0x3f,0x16,0x70,0x61,0x74,0x6d,0x61,0x74,0x68,1,0x63,0x30,0x73,0x13,0x74, +0x61,0x72,0x74,0xa3,0x49,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0xa3,0x4a,0x10, +0x6f,0x42,0x16,0x67,0x72,0x61,0x70,0x68,0x69,0x63,0x43,2,0x64,0x2e,0x70,0x86, +0x73,0x10,0x63,0xc3,0x17,0x11,0x69,0x63,1,0x70,0x46,0x73,0x1e,0x79,0x6c,0x6c, +0x61,0x62,0x69,0x63,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x17,0x10,0x6f, +0x1f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72, +0x79,0xc3,0x16,0x10,0x63,0xc3,0x16,2,0x67,0xc3,6,0x6f,0x26,0x74,0xc3,7, +0x11,0x69,0x6e,1,0x63,0x4a,0x69,0x11,0x6e,0x67,1,0x67,0x2e,0x74,0x12,0x79, +0x70,0x65,0xc3,7,0x13,0x72,0x6f,0x75,0x70,0xc3,6,0x48,0x15,0x6f,0x6e,0x74, +0x72,0x6f,0x6c,0x49,0x66,0x86,0x67,0xa2,0x4a,0x68,3,0x61,0x36,0x65,0x58,0x73, +0x68,0x79,0x13,0x70,0x68,0x65,0x6e,0x3d,0x1f,0x6e,0x67,0x75,0x6c,0x73,0x79,0x6c, +0x6c,0x61,0x62,0x6c,0x65,0x74,0x79,0x70,0x65,0xc3,0xb,0x10,0x78,0x3a,0x14,0x64, +0x69,0x67,0x69,0x74,0x3b,0x10,0x74,0xc3,0xb,0x16,0x75,0x6c,0x6c,0x63,0x6f,0x6d, +0x70,0x1f,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x65,0x78,0x63,0x6c,0x75,0x73,0x69, +0x6f,0x6e,0x33,2,0x63,0xa2,0x44,0x65,0xa2,0x4b,0x72,3,0x61,0x34,0x62,0x84, +0x65,0x8a,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x11,0x70,0x68,0x7c,0x12,0x65,0x6d,0x65, +3,0x62,0x5e,0x63,0x30,0x65,0x48,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x1a,0x6c,0x75, +0x73,0x74,0x65,0x72,0x62,0x72,0x65,0x61,0x6b,0xc3,0x12,0x14,0x78,0x74,0x65,0x6e, +0x64,0x37,0x12,0x61,0x73,0x65,0x35,0x11,0x78,0x74,0x37,0xc2,5,1,0x62,0xc3, +0x12,0x6d,0xd9,0x20,0,0x1c,0x6e,0x65,0x72,0x61,0x6c,0x63,0x61,0x74,0x65,0x67, +0x6f,0x72,0x79,0xc2,5,0x13,0x6d,0x61,0x73,0x6b,0xd9,0x20,0,0x61,0xa2,0xa2, +0x62,0xa2,0xd0,0x63,0xa4,0x4f,0x64,0xa6,0x1c,0x65,5,0x6d,0x75,0x6d,0x6e,0x70, +0xa2,0x6b,0x78,0x10,0x74,0x30,1,0x65,0x2c,0x70,0x12,0x69,0x63,0x74,0xa1,0x12, +0x6e,0x64,0x65,1,0x64,0x24,0x72,0x31,0x1b,0x70,0x69,0x63,0x74,0x6f,0x67,0x72, +0x61,0x70,0x68,0x69,0x63,0xa1,0x10,0x6f,1,0x64,0x97,0x6a,0x10,0x69,0x92,3, +0x63,0x44,0x6b,0x54,0x6d,0x70,0x70,0x1a,0x72,0x65,0x73,0x65,0x6e,0x74,0x61,0x74, +0x69,0x6f,0x6e,0x95,0x17,0x6f,0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x9b,0x1c,0x65, +0x79,0x63,0x61,0x70,0x73,0x65,0x71,0x75,0x65,0x6e,0x63,0x65,0xa3,0x42,0x16,0x6f, +0x64,0x69,0x66,0x69,0x65,0x72,0x96,0x13,0x62,0x61,0x73,0x65,0x99,0x12,0x72,0x65, +0x73,0x95,0x61,0x30,0x62,0x4e,0x63,0x12,0x6f,0x6d,0x70,0x9b,0xc2,4,0x1b,0x73, +0x74,0x61,0x73,0x69,0x61,0x6e,0x77,0x69,0x64,0x74,0x68,0xc3,4,0x12,0x61,0x73, +0x65,0x99,3,0x67,0x44,0x68,0x4a,0x6c,0x4e,0x73,0x1a,0x63,0x69,0x69,0x68,0x65, +0x78,0x64,0x69,0x67,0x69,0x74,0x23,0x10,0x65,0xd9,0x40,0,0x11,0x65,0x78,0x23, +1,0x6e,0x38,0x70,0x11,0x68,0x61,0x20,0x14,0x62,0x65,0x74,0x69,0x63,0x21,0x11, +0x75,0x6d,0x79,5,0x6c,0x22,0x6c,0x36,0x6d,0x52,0x70,1,0x62,0xd9,0x40,0xd, +0x74,0xc3,0x15,2,0x61,0x32,0x6b,0xc3,1,0x6f,0x11,0x63,0x6b,0xc3,1,0x11, +0x6e,0x6b,0x7b,0x10,0x67,0xd9,0x40,1,0x61,0xa2,0x4f,0x63,0xc3,0,0x69,0x11, +0x64,0x69,2,0x63,0x54,0x6d,0x74,0x70,0x1b,0x61,0x69,0x72,0x65,0x64,0x62,0x72, +0x61,0x63,0x6b,0x65,0x74,0xd8,0x40,0xd,0x13,0x74,0x79,0x70,0x65,0xc3,0x15,0x24, +1,0x6c,0x30,0x6f,0x14,0x6e,0x74,0x72,0x6f,0x6c,0x25,0x12,0x61,0x73,0x73,0xc3, +0,0x26,0x14,0x69,0x72,0x72,0x6f,0x72,1,0x65,0x38,0x69,0x16,0x6e,0x67,0x67, +0x6c,0x79,0x70,0x68,0xd9,0x40,1,0x10,0x64,0x27,0x17,0x73,0x69,0x63,0x65,0x6d, +0x6f,0x6a,0x69,0xa3,0x41,6,0x68,0x7c,0x68,0x54,0x69,0x85,0x6f,0xa2,0x6f,0x77, +4,0x63,0x30,0x6b,0x36,0x6c,0x87,0x74,0x8b,0x75,0x89,1,0x66,0x8d,0x6d,0x8f, +0x11,0x63,0x66,0x91,0x18,0x61,0x6e,0x67,0x65,0x73,0x77,0x68,0x65,0x6e,4,0x63, +0x44,0x6c,0x6c,0x6e,0x7e,0x74,0x98,0x75,0x18,0x70,0x70,0x65,0x72,0x63,0x61,0x73, +0x65,0x64,0x89,0x12,0x61,0x73,0x65,1,0x66,0x30,0x6d,0x14,0x61,0x70,0x70,0x65, +0x64,0x8f,0x14,0x6f,0x6c,0x64,0x65,0x64,0x8d,0x18,0x6f,0x77,0x65,0x72,0x63,0x61, +0x73,0x65,0x64,0x87,0x1c,0x66,0x6b,0x63,0x63,0x61,0x73,0x65,0x66,0x6f,0x6c,0x64, +0x65,0x64,0x91,0x18,0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x64,0x8b,0x13,0x6d, +0x70,0x65,0x78,0x33,0x61,0x2e,0x63,0xa2,0x48,0x66,0xd9,0x40,2,1,0x6e,0x72, +0x73,0x10,0x65,3,0x64,0x83,0x66,0x3a,0x69,0x4a,0x73,0x17,0x65,0x6e,0x73,0x69, +0x74,0x69,0x76,0x65,0x65,0x15,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,2,0x17, +0x67,0x6e,0x6f,0x72,0x61,0x62,0x6c,0x65,0x85,0x13,0x6f,0x6e,0x69,0x63,0x1f,0x61, +0x6c,0x63,0x6f,0x6d,0x62,0x69,0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3, +2,0x10,0x63,0xc3,2,3,0x61,0x30,0x65,0x34,0x69,0xa2,0x41,0x74,0xc3,3, +0x11,0x73,0x68,0x29,2,0x63,0x3a,0x66,0x58,0x70,0x2c,0x16,0x72,0x65,0x63,0x61, +0x74,0x65,0x64,0x2d,0x1d,0x6f,0x6d,0x70,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x74, +0x79,0x70,0x65,0xc3,3,0x15,0x61,0x75,0x6c,0x74,0x69,0x67,0x1f,0x6e,0x6f,0x72, +0x61,0x62,0x6c,0x65,0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x2b,0x2a,0x10, +0x61,0x2e,0x15,0x63,0x72,0x69,0x74,0x69,0x63,0x2f,3,0x66,0x34,0x6e,0x3e,0x74, +0x42,0x79,0x22,0x11,0x65,0x73,0x23,0x20,0x13,0x61,0x6c,0x73,0x65,0x21,0x20,0x10, +0x6f,0x21,0x22,0x12,0x72,0x75,0x65,0x23,0xb,0x6b,0x5b,0x6f,0x23,0x6f,0x3c,0x72, +0x4c,0x76,1,0x69,0x24,0x72,0x33,0x13,0x72,0x61,0x6d,0x61,0x33,0x10,0x76,0x22, +0x14,0x65,0x72,0x6c,0x61,0x79,0x23,0xa2,0xe2,0x13,0x69,0x67,0x68,0x74,0xa3,0xe2, +0x6b,0x58,0x6c,0x74,0x6e,3,0x6b,0x2f,0x6f,0x30,0x72,0x21,0x75,0x12,0x6b,0x74, +0x61,0x2f,0x19,0x74,0x72,0x65,0x6f,0x72,0x64,0x65,0x72,0x65,0x64,0x21,1,0x61, +0x24,0x76,0x31,0x18,0x6e,0x61,0x76,0x6f,0x69,0x63,0x69,0x6e,0x67,0x31,0xa2,0xe0, +0x12,0x65,0x66,0x74,0xa3,0xe0,0x64,0x45,0x64,0x4e,0x68,0x88,0x69,1,0x6f,0x26, +0x73,0xa3,0xf0,0x1a,0x74,0x61,0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74,0xa3, +0xf0,2,0x61,0xa3,0xea,0x62,0xa3,0xe9,0x6f,0x13,0x75,0x62,0x6c,0x65,1,0x61, +0x30,0x62,0x13,0x65,0x6c,0x6f,0x77,0xa3,0xe9,0x13,0x62,0x6f,0x76,0x65,0xa3,0xea, +0x12,0x61,0x6e,0x72,0x2c,0x15,0x65,0x61,0x64,0x69,0x6e,0x67,0x2d,0x61,0xa2,0x7b, +0x62,0xa2,0xd4,0x63,0x11,0x63,0x63,4,0x31,0x3c,0x32,0xa2,0x42,0x33,0xa2,0x56, +0x38,0xa2,0x64,0x39,0x10,0x31,0xa3,0x5b,9,0x35,0xa,0x35,0x3f,0x36,0x41,0x37, +0x43,0x38,0x45,0x39,0x47,0x30,0x30,0x31,0x3c,0x32,0x42,0x33,0x4e,0x34,0x3d,0x34, +1,0x33,0xa3,0x67,0x37,0xa3,0x6b,0x36,0x10,0x38,0xa3,0x76,0x38,1,0x32,0xa3, +0x7a,0x39,0xa3,0x81,0x3a,2,0x30,0xa3,0x82,0x32,0xa3,0x84,0x33,0xa3,0x85,9, +0x35,0xa,0x35,0x53,0x36,0x55,0x37,0x57,0x38,0x59,0x39,0x5b,0x30,0x49,0x31,0x4b, +0x32,0x4d,0x33,0x4f,0x34,0x51,6,0x33,8,0x33,0x63,0x34,0x65,0x35,0x67,0x36, +0x69,0x30,0x5d,0x31,0x5f,0x32,0x61,0x10,0x34,0xa3,0x54,0xa2,0xe6,3,0x62,0xa0, +0x6c,0xa3,0xe4,0x72,0xa3,0xe8,0x74,2,0x61,0x74,0x62,0x7c,0x74,0x14,0x61,0x63, +0x68,0x65,0x64,1,0x61,0x3e,0x62,0x13,0x65,0x6c,0x6f,0x77,0xa2,0xca,0x13,0x6c, +0x65,0x66,0x74,0xa3,0xc8,0x13,0x62,0x6f,0x76,0x65,0xa2,0xd6,0x14,0x72,0x69,0x67, +0x68,0x74,0xa3,0xd8,0xa2,0xd6,0x10,0x72,0xa3,0xd8,0xa2,0xca,0x10,0x6c,0xa3,0xc8, +0x12,0x6f,0x76,0x65,0xa2,0xe6,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3, +0xe8,0x12,0x65,0x66,0x74,0xa3,0xe4,0xa2,0xdc,2,0x65,0x2c,0x6c,0xa3,0xda,0x72, +0xa3,0xde,0x12,0x6c,0x6f,0x77,0xa2,0xdc,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68, +0x74,0xa3,0xde,0x12,0x65,0x66,0x74,0xa3,0xda,0xb,0x6e,0xc0,0xca,0x72,0x5f,0x72, +0x46,0x73,0xa2,0x48,0x77,1,0x68,0x24,0x73,0x33,0x17,0x69,0x74,0x65,0x73,0x70, +0x61,0x63,0x65,0x33,0x22,1,0x69,0x30,0x6c,2,0x65,0x3d,0x69,0x4b,0x6f,0x3f, +0x18,0x67,0x68,0x74,0x74,0x6f,0x6c,0x65,0x66,0x74,0x22,2,0x65,0x38,0x69,0x48, +0x6f,0x16,0x76,0x65,0x72,0x72,0x69,0x64,0x65,0x3f,0x17,0x6d,0x62,0x65,0x64,0x64, +0x69,0x6e,0x67,0x3d,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4b,0x30,0x1e,0x65,0x67, +0x6d,0x65,0x6e,0x74,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x31,0x6e,0xa2, +0x41,0x6f,0xa2,0x53,0x70,2,0x61,0x66,0x64,0x86,0x6f,0x1b,0x70,0x64,0x69,0x72, +0x65,0x63,0x74,0x69,0x6f,0x6e,0x61,0x6c,1,0x66,0x32,0x69,0x15,0x73,0x6f,0x6c, +0x61,0x74,0x65,0x4d,0x14,0x6f,0x72,0x6d,0x61,0x74,0x41,0x1f,0x72,0x61,0x67,0x72, +0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x2f,1,0x66,0x41, +0x69,0x4d,1,0x6f,0x28,0x73,0x10,0x6d,0x43,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69, +0x6e,0x67,0x6d,0x61,0x72,0x6b,0x43,1,0x6e,0x35,0x74,0x19,0x68,0x65,0x72,0x6e, +0x65,0x75,0x74,0x72,0x61,0x6c,0x35,0x65,0x88,0x65,0x98,0x66,0xa2,0x6a,0x6c,0x20, +1,0x65,0x30,0x72,2,0x65,0x37,0x69,0x49,0x6f,0x39,0x18,0x66,0x74,0x74,0x6f, +0x72,0x69,0x67,0x68,0x74,0x20,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72, +0x72,0x69,0x64,0x65,0x39,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x37,0x15, +0x73,0x6f,0x6c,0x61,0x74,0x65,0x49,3,0x6e,0x25,0x73,0x27,0x74,0x29,0x75,0x15, +0x72,0x6f,0x70,0x65,0x61,0x6e,2,0x6e,0x3c,0x73,0x46,0x74,0x18,0x65,0x72,0x6d, +0x69,0x6e,0x61,0x74,0x6f,0x72,0x29,0x14,0x75,0x6d,0x62,0x65,0x72,0x25,0x17,0x65, +0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x27,1,0x69,0x28,0x73,0x10,0x69,0x47,0x1f, +0x72,0x73,0x74,0x73,0x74,0x72,0x6f,0x6e,0x67,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65, +0x47,0x61,0x4e,0x62,0x84,0x63,1,0x6f,0x24,0x73,0x2d,0x1c,0x6d,0x6d,0x6f,0x6e, +0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x2d,2,0x6c,0x3b,0x6e,0x2b,0x72, +0x13,0x61,0x62,0x69,0x63,1,0x6c,0x30,0x6e,0x14,0x75,0x6d,0x62,0x65,0x72,0x2b, +0x14,0x65,0x74,0x74,0x65,0x72,0x3b,0x2e,1,0x6e,0x45,0x6f,0x1c,0x75,0x6e,0x64, +0x61,0x72,0x79,0x6e,0x65,0x75,0x74,0x72,0x61,0x6c,0x45,0,0x16,0x6d,0xc9,0x20, +0x74,0xc2,0x30,0x77,0x89,0x77,0x86,0x79,0xa2,0x46,0x7a,1,0x61,0x58,0x6e,0x1a, +0x61,0x6d,0x65,0x6e,0x6e,0x79,0x6d,0x75,0x73,0x69,0x63,0xa4,0x40,0x19,0x61,0x6c, +0x6e,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0xa5,0x40,0x1c,0x6e,0x61,0x62,0x61,0x7a, +0x61,0x72,0x73,0x71,0x75,0x61,0x72,0x65,0xa5,0x18,0x10,0x61,1,0x6e,0x36,0x72, +0x16,0x61,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0xfc,0x12,0x63,0x68,0x6f,0xa5,0x2c, +1,0x65,0x88,0x69,2,0x6a,0x3c,0x72,0x68,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62, +0x6c,0x65,0x73,0xa3,0x48,0x12,0x69,0x6e,0x67,0xa2,0x74,0x1e,0x68,0x65,0x78,0x61, +0x67,0x72,0x61,0x6d,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x74,0x16,0x61,0x64, +0x69,0x63,0x61,0x6c,0x73,0xa3,0x49,0x13,0x7a,0x69,0x64,0x69,0xa5,0x34,0x74,0xa2, +0x65,0x75,0xa4,0x4f,0x76,3,0x61,0x3c,0x65,0x80,0x69,0xa2,0x50,0x73,0xa2,0x6c, +0x12,0x73,0x75,0x70,0xa3,0x7d,1,0x69,0xa3,0x9f,0x72,0x1e,0x69,0x61,0x74,0x69, +0x6f,0x6e,0x73,0x65,0x6c,0x65,0x63,0x74,0x6f,0x72,0x73,0xa2,0x6c,0x19,0x73,0x75, +0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x7d,1,0x64,0x3c,0x72,0x19,0x74, +0x69,0x63,0x61,0x6c,0x66,0x6f,0x72,0x6d,0x73,0xa3,0x91,0x14,0x69,0x63,0x65,0x78, +0x74,0xa2,0xaf,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xaf,0x15,0x74,0x68, +0x6b,0x75,0x71,0x69,0xa5,0x3f,5,0x69,0x3f,0x69,0x5a,0x6f,0x8c,0x72,0x1c,0x61, +0x6e,0x73,0x70,0x6f,0x72,0x74,0x61,0x6e,0x64,0x6d,0x61,0x70,0xa2,0xcf,0x16,0x73, +0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xcf,2,0x62,0x34,0x66,0x3c,0x72,0x13,0x68, +0x75,0x74,0x61,0xa3,0xfb,0x13,0x65,0x74,0x61,0x6e,0x57,0x14,0x69,0x6e,0x61,0x67, +0x68,0xa3,0x90,0x11,0x74,0x6f,0xa5,0x3d,0x61,0x3e,0x65,0xa2,0xa0,0x68,0x10,0x61, +1,0x61,0x24,0x69,0x53,0x11,0x6e,0x61,0x3d,4,0x67,0x8e,0x69,0xa2,0x49,0x6b, +0xa2,0x72,0x6d,0xa2,0x74,0x6e,0x10,0x67,1,0x73,0x68,0x75,0x10,0x74,0xa4,0x10, +1,0x63,0x40,0x73,0x11,0x75,0x70,0xa4,0x33,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e, +0x74,0xa5,0x33,0x18,0x6f,0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x73,0xa5,0x11,0x10, +0x61,0xa5,0x3c,2,0x61,0x2a,0x62,0x32,0x73,0xa3,0x60,0x12,0x6c,0x6f,0x67,0xa3, +0x62,0x13,0x61,0x6e,0x77,0x61,0xa3,0x65,3,0x6c,0x52,0x74,0x56,0x76,0x5e,0x78, +0x16,0x75,0x61,0x6e,0x6a,0x69,0x6e,0x67,0xa2,0x7c,0x16,0x73,0x79,0x6d,0x62,0x6f, +0x6c,0x73,0xa3,0x7c,0x10,0x65,0xa3,0x70,0x12,0x68,0x61,0x6d,0xa3,0xae,0x12,0x69, +0x65,0x74,0xa3,0xb7,0x11,0x72,0x69,0xa3,0xdc,0x11,0x69,0x6c,0x48,0x12,0x73,0x75, +0x70,0xa4,0x2b,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x2b,0x13,0x6c,0x75, +0x67,0x75,0x4b,2,0x63,0x8c,0x67,0xa2,0x41,0x6e,0x1f,0x69,0x66,0x69,0x65,0x64, +0x63,0x61,0x6e,0x61,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x1f,0x72,0x69,0x67,0x69, +0x6e,0x61,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x62,0x17,0x65,0x78, +0x74,0x65,0x6e,0x64,0x65,0x64,0xa2,0xad,0x10,0x61,0xa5,0x3e,0x11,0x61,0x73,0x62, +0x12,0x65,0x78,0x74,0xa2,0xad,0x10,0x61,0xa5,0x3e,0x15,0x61,0x72,0x69,0x74,0x69, +0x63,0xa3,0x78,0x70,0xc3,0x4b,0x70,0xa6,0x61,0x72,0xa8,0x1d,0x73,7,0x6f,0xc1, +0xbe,0x6f,0xa2,0x69,0x70,0xa2,0x85,0x75,0xa2,0xa4,0x79,2,0x6c,0x50,0x6d,0x62, +0x72,0x12,0x69,0x61,0x63,0x3a,0x12,0x73,0x75,0x70,0xa4,0x17,0x16,0x70,0x6c,0x65, +0x6d,0x65,0x6e,0x74,0xa5,0x17,0x17,0x6f,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0xa3, +0x8f,0x13,0x62,0x6f,0x6c,0x73,1,0x61,0x4c,0x66,0x10,0x6f,0x1f,0x72,0x6c,0x65, +0x67,0x61,0x63,0x79,0x63,0x6f,0x6d,0x70,0x75,0x74,0x69,0x6e,0x67,0xa5,0x32,0x1f, +0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x78,0x74, +1,0x61,0xa5,0x2a,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa5,0x2a,2,0x67,0x34, +0x72,0x3e,0x79,0x13,0x6f,0x6d,0x62,0x6f,0xa5,0x16,0x13,0x64,0x69,0x61,0x6e,0xa5, +0x23,0x17,0x61,0x73,0x6f,0x6d,0x70,0x65,0x6e,0x67,0xa3,0xda,1,0x61,0x32,0x65, +0x14,0x63,0x69,0x61,0x6c,0x73,0xa3,0x56,0x12,0x63,0x69,0x6e,0x1f,0x67,0x6d,0x6f, +0x64,0x69,0x66,0x69,0x65,0x72,0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,2,0x6e, +0x48,0x70,0x76,0x74,0x1d,0x74,0x6f,0x6e,0x73,0x69,0x67,0x6e,0x77,0x72,0x69,0x74, +0x69,0x6e,0x67,0xa5,6,0x15,0x64,0x61,0x6e,0x65,0x73,0x65,0xa2,0x9b,0x12,0x73, +0x75,0x70,0xa2,0xdb,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xdb,4,0x61, +0xa2,0xa8,0x65,0x5c,0x6d,0x9e,0x70,0xa2,0x4b,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f, +0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73, +0xa5,5,0x10,0x72,1,0x61,0x4e,0x73,0x12,0x63,0x72,0x69,0x1f,0x70,0x74,0x73, +0x61,0x6e,0x64,0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74,0x73,0x73,0x14,0x6e, +0x64,0x73,0x75,0x62,0x73,0x1b,0x61,0x74,0x68,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f, +0x72,0x73,0xa3,0x6a,1,0x6c,0x40,0x75,1,0x61,0x6e,0x6e,0x17,0x63,0x74,0x75, +0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x15,0x65,0x6d,0x65,0x6e,0x74,0x61,1,0x6c, +0x50,0x72,0x1e,0x79,0x70,0x72,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x61,0x72, +0x65,0x61,1,0x61,0xa3,0x6d,0x62,0xa3,0x6e,3,0x61,0x5c,0x6d,0x78,0x70,0xa2, +0x41,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63, +0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5,0x14,0x72,0x72,0x6f,0x77,0x73, +2,0x61,0xa3,0x67,0x62,0xa3,0x68,0x63,0xa3,0xfa,0x13,0x61,0x74,0x68,0x65,0x1f, +0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73, +0xa3,0x6a,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x61, +0x88,0x68,0xa2,0x48,0x69,0xa2,0x71,0x6d,0x12,0x61,0x6c,0x6c,1,0x66,0x46,0x6b, +0x15,0x61,0x6e,0x61,0x65,0x78,0x74,0xa4,0x29,0x15,0x65,0x6e,0x73,0x69,0x6f,0x6e, +0xa5,0x29,0x12,0x6f,0x72,0x6d,1,0x73,0xa3,0x54,0x76,0x16,0x61,0x72,0x69,0x61, +0x6e,0x74,0x73,0xa3,0x54,1,0x6d,0x36,0x75,0x16,0x72,0x61,0x73,0x68,0x74,0x72, +0x61,0xa3,0xa1,0x15,0x61,0x72,0x69,0x74,0x61,0x6e,0xa3,0xac,1,0x61,0x52,0x6f, +0x13,0x72,0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f, +0x6e,0x74,0x72,0x6f,0x6c,0x73,0xa3,0xf7,1,0x72,0x2e,0x76,0x12,0x69,0x61,0x6e, +0xa3,0x79,0x12,0x61,0x64,0x61,0xa3,0xd9,1,0x64,0x50,0x6e,0x13,0x68,0x61,0x6c, +0x61,0x50,0x1d,0x61,0x72,0x63,0x68,0x61,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72, +0x73,0xa3,0xf9,0x13,0x64,0x68,0x61,0x6d,0xa3,0xf8,5,0x72,0x35,0x72,0x44,0x73, +0x64,0x75,1,0x61,0xa3,0x4e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e, +0x71,0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0xa2,0x4e,0x13,0x61,0x72,0x65, +0x61,0xa3,0x4e,0x1b,0x61,0x6c,0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69, +0xa3,0xf6,0x61,0x40,0x68,0x82,0x6c,0x19,0x61,0x79,0x69,0x6e,0x67,0x63,0x61,0x72, +0x64,0x73,0xa3,0xcc,2,0x68,0x38,0x6c,0x4a,0x75,0x15,0x63,0x69,0x6e,0x68,0x61, +0x75,0xa3,0xf5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xf3,0x15,0x6d, +0x79,0x72,0x65,0x6e,0x65,0xa3,0xf4,1,0x61,0x8e,0x6f,1,0x65,0x74,0x6e,0x16, +0x65,0x74,0x69,0x63,0x65,0x78,0x74,0xa2,0x72,1,0x65,0x2c,0x73,0x11,0x75,0x70, +0xa3,0x8d,0x15,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa2,0x72,0x19,0x73,0x75,0x70,0x70, +0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x8d,0x15,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3, +0x97,1,0x67,0x3e,0x69,0x13,0x73,0x74,0x6f,0x73,0xa2,0xa6,0x13,0x64,0x69,0x73, +0x63,0xa3,0xa6,0x12,0x73,0x70,0x61,0xa3,0x96,1,0x65,0x5c,0x75,1,0x6d,0x2a, +0x6e,0x11,0x69,0x63,0x67,0x10,0x69,0xa2,0xc0,0x1d,0x6e,0x75,0x6d,0x65,0x72,0x61, +0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xc0,0x13,0x6a,0x61,0x6e,0x67,0xa3, +0xa3,0x6d,0xa2,0xf0,0x6e,0xa8,0x23,0x6f,6,0x70,0x63,0x70,0x56,0x72,0x8a,0x73, +0xa2,0x4c,0x74,0x10,0x74,0x1f,0x6f,0x6d,0x61,0x6e,0x73,0x69,0x79,0x61,0x71,0x6e, +0x75,0x6d,0x62,0x65,0x72,0x73,0xa5,0x28,0x18,0x74,0x69,0x63,0x61,0x6c,0x63,0x68, +0x61,0x72,0x1f,0x61,0x63,0x74,0x65,0x72,0x72,0x65,0x63,0x6f,0x67,0x6e,0x69,0x74, +0x69,0x6f,0x6e,0x85,1,0x69,0x46,0x6e,0x1e,0x61,0x6d,0x65,0x6e,0x74,0x61,0x6c, +0x64,0x69,0x6e,0x67,0x62,0x61,0x74,0x73,0xa3,0xf2,0x11,0x79,0x61,0x47,1,0x61, +0x30,0x6d,0x13,0x61,0x6e,0x79,0x61,0xa3,0x7a,0x11,0x67,0x65,0xa5,0xf,0x63,0xa2, +0x7b,0x67,0xa2,0x7b,0x6c,1,0x63,0xa2,0x6c,0x64,6,0x70,0x42,0x70,0x3a,0x73, +0x5a,0x74,0x88,0x75,0x14,0x79,0x67,0x68,0x75,0x72,0xa5,0x3b,0x11,0x65,0x72,1, +0x6d,0x2e,0x73,0x12,0x69,0x61,0x6e,0xa3,0x8c,0x11,0x69,0x63,0xa3,0xf1,0x10,0x6f, +1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xbb, +0x13,0x64,0x69,0x61,0x6e,0xa5,0x22,0x14,0x75,0x72,0x6b,0x69,0x63,0xa3,0xbf,0x68, +0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e, +0xa3,0xf0,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa5,4,0x14,0x74,0x61, +0x6c,0x69,0x63,0xa3,0x58,0x13,0x68,0x69,0x6b,0x69,0xa3,0x9d,0x10,0x72,0x85,0x12, +0x68,0x61,0x6d,0x65,6,0x6f,0x86,0x6f,0x6c,0x72,0xa2,0x61,0x75,0xa2,0x62,0x79, +0x14,0x61,0x6e,0x6d,0x61,0x72,0x58,0x12,0x65,0x78,0x74,2,0x61,0xa3,0xb6,0x62, +0xa3,0xee,0x65,0x13,0x6e,0x64,0x65,0x64,1,0x61,0xa3,0xb6,0x62,0xa3,0xee,1, +0x64,0x52,0x6e,0x15,0x67,0x6f,0x6c,0x69,0x61,0x6e,0x6a,0x12,0x73,0x75,0x70,0xa4, +0xd,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xd,0x10,0x69,0xa2,0xec,0x13, +0x66,0x69,0x65,0x72,1,0x6c,0x3c,0x74,0x19,0x6f,0x6e,0x65,0x6c,0x65,0x74,0x74, +0x65,0x72,0x73,0xa3,0x8a,0x15,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,0x10,0x6f,0xa3, +0xed,1,0x6c,0x44,0x73,0x11,0x69,0x63,0xa2,0x5c,0x18,0x61,0x6c,0x73,0x79,0x6d, +0x62,0x6f,0x6c,0x73,0xa3,0x5c,0x13,0x74,0x61,0x6e,0x69,0xa5,3,0x61,0xa2,0x9b, +0x65,0xa4,0x4c,0x69,1,0x61,0xa2,0x8f,0x73,0x10,0x63,5,0x70,0x18,0x70,0xa2, +0x71,0x73,0x36,0x74,0x17,0x65,0x63,0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x15,0x79, +0x6d,0x62,0x6f,0x6c,0x73,0x8f,0x61,0xa2,0x66,0x65,0x46,0x6d,0x19,0x61,0x74,0x68, +0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x17,0x6c, +0x6c,0x61,0x6e,0x65,0x6f,0x75,0x73,2,0x6d,0x3a,0x73,0x6c,0x74,0x17,0x65,0x63, +0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x11,0x61,0x74,0x1f,0x68,0x65,0x6d,0x61,0x74, +0x69,0x63,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62, +0xa3,0x69,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x8e,0x12,0x61,0x6e,0x64,1,0x61, +0x3c,0x70,0x19,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa3,0xcd,0x14, +0x72,0x72,0x6f,0x77,0x73,0xa3,0x73,0x10,0x6f,0xa3,0xd8,7,0x72,0x6f,0x72,0x44, +0x73,0x4e,0x74,0x62,0x79,0x19,0x61,0x6e,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73, +0xa5,0x20,0x13,0x63,0x68,0x65,0x6e,0xa5,0xc,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f, +0x6e,0x64,0x69,0xa5,0x14,0x10,0x68,2,0x61,0x3a,0x65,0x4a,0x6f,0x17,0x70,0x65, +0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0xa3, +0x5d,0x16,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,1,0x61,0x36,0x6f,0x17,0x70,0x65, +0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x11,0x6c,0x70,0x1f,0x68,0x61,0x6e,0x75,0x6d, +0x65,0x72,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5d,0x68,0x50,0x6b, +0x7e,0x6c,0x88,0x6e,1,0x64,0x34,0x69,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3, +0xea,0x12,0x61,0x69,0x63,0xa3,0xc6,1,0x61,0x3e,0x6a,0x12,0x6f,0x6e,0x67,0xa2, +0xaa,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xaa,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xe9, +0x13,0x61,0x73,0x61,0x72,0xa5,0x1f,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x4f,3, +0x64,0x6c,0x65,0x7e,0x6e,0xa2,0x47,0x72,0x14,0x6f,0x69,0x74,0x69,0x63,1,0x63, +0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xd7,0x15, +0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0xd6,0x17,0x65,0x66,0x61,0x69,0x64,0x72,0x69, +0x6e,0xa5,0x21,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa2,0xb8,0x12,0x65, +0x78,0x74,0xa2,0xd5,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xd5,0x18,0x64, +0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0xeb,6,0x6b,0x3b,0x6b,0x56,0x6f, +0x5a,0x75,0x64,0x79,0x11,0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63, +0x68,0x75,0x65,0x68,0x6d,0x6f,0x6e,0x67,0xa5,0x27,0x10,0x6f,0xa3,0x92,0x14,0x62, +0x6c,0x6f,0x63,0x6b,0x21,1,0x6d,0x2c,0x73,0x11,0x68,0x75,0xa5,0x15,0x17,0x62, +0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0x7b,0x61,0x44,0x62,0x21,0x65,0x10,0x77,1, +0x61,0xa5,0xe,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,0xa3,0x8b,2,0x62,0x3c,0x67, +0x4a,0x6e,0x17,0x64,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,0xa5,0x26,0x15,0x61,0x74, +0x61,0x65,0x61,0x6e,0xa3,0xef,0x16,0x6d,0x75,0x6e,0x64,0x61,0x72,0x69,0xa5,0x47, +0x67,0xc4,0x5d,0x6a,0xc1,0xe4,0x6a,0xa2,0xdf,0x6b,0xa2,0xf8,0x6c,4,0x61,0x54, +0x65,0xa2,0x6b,0x69,0xa2,0x82,0x6f,0xa2,0xc1,0x79,1,0x63,0x2e,0x64,0x12,0x69, +0x61,0x6e,0xa3,0xa9,0x12,0x69,0x61,0x6e,0xa3,0xa7,1,0x6f,0x55,0x74,0x11,0x69, +0x6e,1,0x31,0x96,0x65,0x11,0x78,0x74,6,0x64,0x21,0x64,0xa3,0x95,0x65,0x2c, +0x66,0xa5,0x39,0x67,0xa5,0x3a,0xa2,0xe7,0x13,0x6e,0x64,0x65,0x64,6,0x64,0xc, +0x64,0xa3,0x95,0x65,0xa3,0xe7,0x66,0xa5,0x39,0x67,0xa5,0x3a,0x61,0x2a,0x62,0x29, +0x63,0xa3,0x94,0x26,0x18,0x64,0x64,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x6d,0x24, +0x12,0x73,0x75,0x70,0x24,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x25,1,0x70, +0x42,0x74,0x1d,0x74,0x65,0x72,0x6c,0x69,0x6b,0x65,0x73,0x79,0x6d,0x62,0x6f,0x6c, +0x73,0x79,0x12,0x63,0x68,0x61,0xa3,0x9c,2,0x6d,0x4e,0x6e,0x54,0x73,0x10,0x75, +0xa2,0xb0,0x12,0x73,0x75,0x70,0xa4,0x31,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74, +0xa5,0x31,0x11,0x62,0x75,0xa3,0x6f,0x12,0x65,0x61,0x72,1,0x61,0xa3,0xe8,0x62, +1,0x69,0x38,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x75,0x17, +0x64,0x65,0x6f,0x67,0x72,0x61,0x6d,0x73,0xa3,0x76,0x1a,0x77,0x73,0x75,0x72,0x72, +0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4d,0x10,0x61,1,0x6d,0x32,0x76,0x14,0x61, +0x6e,0x65,0x73,0x65,0xa3,0xb5,0x10,0x6f,0x5c,0x12,0x65,0x78,0x74,1,0x61,0xa3, +0xb4,0x62,0xa3,0xb9,1,0x61,0xa2,0x43,0x68,4,0x61,0x40,0x69,0x50,0x6d,0x6e, +0x6f,0x86,0x75,0x15,0x64,0x61,0x77,0x61,0x64,0x69,0xa3,0xe6,0x16,0x72,0x6f,0x73, +0x68,0x74,0x68,0x69,0xa3,0x89,0x1d,0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73, +0x63,0x72,0x69,0x70,0x74,0xa5,0x30,0x11,0x65,0x72,0x68,0x16,0x73,0x79,0x6d,0x62, +0x6f,0x6c,0x73,0xa3,0x71,0x12,0x6a,0x6b,0x69,0xa3,0xe5,5,0x74,0x35,0x74,0x34, +0x77,0x7a,0x79,0x13,0x61,0x68,0x6c,0x69,0xa3,0xa2,0x14,0x61,0x6b,0x61,0x6e,0x61, +0x9e,1,0x65,0x4c,0x70,0x10,0x68,0x1f,0x6f,0x6e,0x65,0x74,0x69,0x63,0x65,0x78, +0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0x6b,0x11,0x78,0x74,0xa3,0x6b,0x10, +0x69,0xa5,0x46,0x69,0xa2,0x4e,0x6b,0xa2,0x51,0x6e,3,0x61,0x34,0x62,0x84,0x67, +0x8a,0x6e,0x12,0x61,0x64,0x61,0x4d,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0xcb, +0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xcb,0x11,0x78,0x74,2,0x61,0xa5, +0x13,0x62,0xa5,0x38,0x65,0x13,0x6e,0x64,0x65,0x64,1,0x61,0xa5,0x13,0x62,0xa5, +0x38,0x11,0x75,0x6e,0xa3,0x42,0x11,0x78,0x69,0x96,0x17,0x72,0x61,0x64,0x69,0x63, +0x61,0x6c,0x73,0x97,0x12,0x74,0x68,0x69,0xa3,0xc1,0x1c,0x74,0x6f,0x76,0x69,0x6b, +0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0xa5,0x45,0x67,0xa2,0xb5,0x68,0xa4,0x84, +0x69,3,0x64,0x4c,0x6d,0xa2,0x55,0x6e,0xa2,0x62,0x70,0x13,0x61,0x65,0x78,0x74, +0x2a,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0x2b,1,0x63,0x99,0x65,0x17,0x6f, +0x67,0x72,0x61,0x70,0x68,0x69,0x63,1,0x64,0x56,0x73,0x15,0x79,0x6d,0x62,0x6f, +0x6c,0x73,0xa4,0xb,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, +0x69,0x6f,0x6e,0xa5,0xb,0x13,0x65,0x73,0x63,0x72,0x1f,0x69,0x70,0x74,0x69,0x6f, +0x6e,0x63,0x68,0x61,0x72,0x61,0x63,0x74,0x65,0x72,0x73,0x99,0x1c,0x70,0x65,0x72, +0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,0x61,0x69,0x63,0xa3,0xba,1,0x64,0x62,0x73, +0x1b,0x63,0x72,0x69,0x70,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32, +0x72,0x14,0x74,0x68,0x69,0x61,0x6e,0xa3,0xbd,0x13,0x6c,0x61,0x76,0x69,0xa3,0xbe, +0x11,0x69,0x63,1,0x6e,0x3e,0x73,0x1a,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62, +0x65,0x72,0x73,0xa5,0x1e,0x19,0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73, +0xa3,0xb2,4,0x65,0x74,0x6c,0xa2,0x82,0x6f,0xa2,0x9a,0x72,0xa2,0x9e,0x75,2, +0x6a,0x34,0x6e,0x3e,0x72,0x14,0x6d,0x75,0x6b,0x68,0x69,0x43,0x14,0x61,0x72,0x61, +0x74,0x69,0x45,0x18,0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x1c,1, +0x6e,0xa2,0x46,0x6f,1,0x6d,0x6e,0x72,0x13,0x67,0x69,0x61,0x6e,0x5a,1,0x65, +0x40,0x73,0x11,0x75,0x70,0xa2,0x87,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3, +0x87,0x11,0x78,0x74,0xa4,0x1b,0x14,0x65,0x6e,0x64,0x65,0x64,0xa5,0x1b,0x1a,0x65, +0x74,0x72,0x69,0x63,0x73,0x68,0x61,0x70,0x65,0x73,0x8c,0x12,0x65,0x78,0x74,0xa2, +0xe3,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0xe3,0x1e,0x65,0x72,0x61,0x6c,0x70,0x75, +0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x71,0x17,0x61,0x67,0x6f,0x6c,0x69, +0x74,0x69,0x63,0xa2,0x88,0x12,0x73,0x75,0x70,0xa4,0xa,0x16,0x70,0x6c,0x65,0x6d, +0x65,0x6e,0x74,0xa5,0xa,0x13,0x74,0x68,0x69,0x63,0xa3,0x59,1,0x61,0x5c,0x65, +0x11,0x65,0x6b,0x30,1,0x61,0x38,0x65,0x11,0x78,0x74,0x6e,0x14,0x65,0x6e,0x64, +0x65,0x64,0x6f,0x17,0x6e,0x64,0x63,0x6f,0x70,0x74,0x69,0x63,0x31,0x13,0x6e,0x74, +0x68,0x61,0xa3,0xe4,2,0x61,0xa2,0x48,0x65,0xa2,0xdf,0x69,1,0x67,0x30,0x72, +0x14,0x61,0x67,0x61,0x6e,0x61,0x9d,0x10,0x68,1,0x70,0x3a,0x73,0x18,0x75,0x72, +0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4b,1,0x72,0x3c,0x75,0x19,0x73,0x75, +0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c,0x11,0x69,0x76,0x1f,0x61,0x74, +0x65,0x75,0x73,0x65,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c, +2,0x6c,0x32,0x6e,0x9a,0x74,0x12,0x72,0x61,0x6e,0xa5,2,0x10,0x66,2,0x61, +0x58,0x6d,0x70,0x77,0x14,0x69,0x64,0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x75,0x6c, +0x6c,0x77,0x69,0x64,0x74,0x68,0x66,0x6f,0x72,0x6d,0x73,0xa3,0x57,0x1a,0x6e,0x64, +0x66,0x75,0x6c,0x6c,0x66,0x6f,0x72,0x6d,0x73,0xa3,0x57,0x13,0x61,0x72,0x6b,0x73, +0xa3,0x52,2,0x67,0x34,0x69,0xa2,0x45,0x75,0x12,0x6e,0x6f,0x6f,0xa3,0x63,0x11, +0x75,0x6c,0xa2,0x4a,2,0x63,0x3c,0x6a,0x5e,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62, +0x6c,0x65,0x73,0xa3,0x4a,0x1f,0x6f,0x6d,0x70,0x61,0x74,0x69,0x62,0x69,0x6c,0x69, +0x74,0x79,0x6a,0x61,0x6d,0x6f,0xa3,0x41,0x12,0x61,0x6d,0x6f,0x5c,0x17,0x65,0x78, +0x74,0x65,0x6e,0x64,0x65,0x64,1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,0x19,0x66,0x69, +0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa5,0x1d,0x13,0x62,0x72,0x65,0x77,0x37, +0x61,0xa4,0xc,0x62,0xa6,0x59,0x63,0xa8,0x2e,0x64,0xac,0xe9,0x65,5,0x6d,0xa9, +0x6d,0x94,0x6e,0xa2,0x41,0x74,0x15,0x68,0x69,0x6f,0x70,0x69,0x63,0x5e,1,0x65, +0x40,0x73,0x11,0x75,0x70,0xa2,0x86,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3, +0x86,0x11,0x78,0x74,0xa2,0x85,2,0x61,0xa3,0xc8,0x62,0xa5,0x37,0x65,0x13,0x6e, +0x64,0x65,0x64,0xa2,0x85,1,0x61,0xa3,0xc8,0x62,0xa5,0x37,0x16,0x6f,0x74,0x69, +0x63,0x6f,0x6e,0x73,0xa3,0xce,0x15,0x63,0x6c,0x6f,0x73,0x65,0x64,2,0x61,0x5a, +0x63,0x9e,0x69,0x1c,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,0x73,0x75, +0x70,0xa2,0xc4,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xc4,0x16,0x6c,0x70, +0x68,0x61,0x6e,0x75,0x6d,0x86,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,0xc3,0x13, +0x72,0x69,0x63,0x73,0x86,0x18,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3, +0xc3,0x11,0x6a,0x6b,0xa2,0x44,0x1f,0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x61,0x6e, +0x64,0x6d,0x6f,0x6e,0x74,0x68,0x73,0xa3,0x44,0x61,0x4a,0x67,0x76,0x6c,1,0x62, +0x30,0x79,0x13,0x6d,0x61,0x69,0x63,0xa5,0x25,0x13,0x61,0x73,0x61,0x6e,0xa3,0xe2, +0x13,0x72,0x6c,0x79,0x64,0x1f,0x79,0x6e,0x61,0x73,0x74,0x69,0x63,0x63,0x75,0x6e, +0x65,0x69,0x66,0x6f,0x72,0x6d,0xa5,1,0x1f,0x79,0x70,0x74,0x69,0x61,0x6e,0x68, +0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,1,0x66,0x26,0x73,0xa3,0xc2,0x1c, +0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,0xa5,0x24,7, +0x6e,0xc0,0xf2,0x6e,0x3e,0x72,0xa2,0x5d,0x73,0xa2,0xe5,0x76,0x14,0x65,0x73,0x74, +0x61,0x6e,0xa3,0xbc,1,0x61,0x92,0x63,0x13,0x69,0x65,0x6e,0x74,1,0x67,0x34, +0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xa5,0x13,0x72,0x65,0x65,0x6b,1, +0x6d,0x34,0x6e,0x15,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x7f,0x13,0x75,0x73,0x69, +0x63,0xa2,0x7e,0x19,0x61,0x6c,0x6e,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x7e, +0x10,0x74,0x1f,0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79, +0x70,0x68,0x73,0xa3,0xfe,2,0x61,0x32,0x6d,0xa2,0x7e,0x72,0x12,0x6f,0x77,0x73, +0x7d,0x12,0x62,0x69,0x63,0x38,3,0x65,0x4a,0x6d,0x80,0x70,0xa2,0x50,0x73,0x11, +0x75,0x70,0xa2,0x80,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x80,0x11,0x78, +0x74,3,0x61,0xa3,0xd2,0x62,0xa5,0x35,0x63,0xa5,0x41,0x65,0x13,0x6e,0x64,0x65, +0x64,2,0x61,0xa3,0xd2,0x62,0xa5,0x35,0x63,0xa5,0x41,0x12,0x61,0x74,0x68,0xa2, +0xd3,0x18,0x65,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x61,0x1f,0x6c,0x70,0x68,0x61, +0x62,0x65,0x74,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd3,1,0x66, +0x42,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x66,0x6f,0x72, +0x6d,0x73,1,0x61,0xa3,0x51,0x62,0xa3,0x55,0x14,0x65,0x6e,0x69,0x61,0x6e,0x35, +0x12,0x63,0x69,0x69,0x23,0x64,0x9e,0x65,0xa2,0x42,0x68,0xa2,0x4d,0x6c,1,0x63, +0x62,0x70,0x17,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x70,1,0x66,0xa3,0x50,0x72, +0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73, +0xa3,0x50,0x16,0x68,0x65,0x6d,0x69,0x63,0x61,0x6c,0xa2,0xd0,0x16,0x73,0x79,0x6d, +0x62,0x6f,0x6c,0x73,0xa3,0xd0,0x12,0x6c,0x61,0x6d,0xa5,7,0x1a,0x67,0x65,0x61, +0x6e,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x77,0x11,0x6f,0x6d,0xa3,0xfd,7, +0x6f,0x71,0x6f,0x64,0x72,0xa2,0x41,0x75,0xa2,0x58,0x79,0x1b,0x7a,0x61,0x6e,0x74, +0x69,0x6e,0x65,0x6d,0x75,0x73,0x69,0x63,0xa2,0x5b,0x18,0x61,0x6c,0x73,0x79,0x6d, +0x62,0x6f,0x6c,0x73,0xa3,0x5b,1,0x70,0x34,0x78,0x16,0x64,0x72,0x61,0x77,0x69, +0x6e,0x67,0x89,0x14,0x6f,0x6d,0x6f,0x66,0x6f,0xa0,0x12,0x65,0x78,0x74,0xa2,0x43, +0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0x43,0x10,0x61,1,0x68,0x40,0x69,0x12,0x6c, +0x6c,0x65,0x92,0x17,0x70,0x61,0x74,0x74,0x65,0x72,0x6e,0x73,0x93,0x11,0x6d,0x69, +0xa3,0xc9,1,0x67,0x2c,0x68,0x11,0x69,0x64,0xa3,0x64,0x14,0x69,0x6e,0x65,0x73, +0x65,0xa3,0x81,0x61,0x48,0x65,0xa2,0x4e,0x68,0xa2,0x52,0x6c,0x1a,0x6f,0x63,0x6b, +0x65,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x73,0x8b,3,0x6c,0x34,0x6d,0x40,0x73,0x66, +0x74,0x11,0x61,0x6b,0xa3,0xc7,0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x93,0x11,0x75, +0x6d,0xa2,0xb1,0x12,0x73,0x75,0x70,0xa2,0xca,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e, +0x74,0xa3,0xca,1,0x69,0x30,0x73,0x13,0x61,0x76,0x61,0x68,0xa3,0xdd,0x15,0x63, +0x6c,0x61,0x74,0x69,0x6e,0x23,0x14,0x6e,0x67,0x61,0x6c,0x69,0x41,0x16,0x61,0x69, +0x6b,0x73,0x75,0x6b,0x69,0xa5,8,5,0x6f,0xc1,0x60,0x6f,0xa2,0x69,0x75,0xa4, +0x24,0x79,1,0x70,0xa2,0x44,0x72,0x14,0x69,0x6c,0x6c,0x69,0x63,0x32,1,0x65, +0x4c,0x73,0x11,0x75,0x70,0xa2,0x61,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa2, +0x61,0x12,0x61,0x72,0x79,0xa3,0x61,0x11,0x78,0x74,4,0x61,0xa3,0x9e,0x62,0xa3, +0xa0,0x63,0xa5,9,0x64,0xa5,0x43,0x65,0x13,0x6e,0x64,0x65,0x64,3,0x61,0xa3, +0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x64,0xa5,0x43,0x10,0x72,1,0x69,0x34,0x6f, +0x15,0x6d,0x69,0x6e,0x6f,0x61,0x6e,0xa5,0x36,0x1a,0x6f,0x74,0x73,0x79,0x6c,0x6c, +0x61,0x62,0x61,0x72,0x79,0xa3,0x7b,3,0x6d,0x5a,0x6e,0xa2,0x95,0x70,0xa2,0xa0, +0x75,0x17,0x6e,0x74,0x69,0x6e,0x67,0x72,0x6f,0x64,0xa2,0x9a,0x17,0x6e,0x75,0x6d, +0x65,0x72,0x61,0x6c,0x73,0xa3,0x9a,2,0x62,0x3a,0x6d,0xa2,0x5f,0x70,0x15,0x61, +0x74,0x6a,0x61,0x6d,0x6f,0xa3,0x41,0x14,0x69,0x6e,0x69,0x6e,0x67,2,0x64,0x46, +0x68,0x9e,0x6d,0x1d,0x61,0x72,0x6b,0x73,0x66,0x6f,0x72,0x73,0x79,0x6d,0x62,0x6f, +0x6c,0x73,0x77,0x1e,0x69,0x61,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x6d,0x61, +0x72,0x6b,0x73,0x2e,2,0x65,0x40,0x66,0xa6,0x52,0x73,0x18,0x75,0x70,0x70,0x6c, +0x65,0x6d,0x65,0x6e,0x74,0xa3,0x83,0x16,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,0xa3, +0xe0,0x17,0x61,0x6c,0x66,0x6d,0x61,0x72,0x6b,0x73,0xa3,0x52,0x11,0x6f,0x6e,0x1f, +0x69,0x6e,0x64,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73, +0xa3,0xb2,0x1b,0x74,0x72,0x6f,0x6c,0x70,0x69,0x63,0x74,0x75,0x72,0x65,0x73,0x83, +0x12,0x74,0x69,0x63,0xa2,0x84,0x1b,0x65,0x70,0x61,0x63,0x74,0x6e,0x75,0x6d,0x62, +0x65,0x72,0x73,0xa3,0xdf,1,0x6e,0x3e,0x72,0x1b,0x72,0x65,0x6e,0x63,0x79,0x73, +0x79,0x6d,0x62,0x6f,0x6c,0x73,0x75,0x15,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa2,0x98, +0x16,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa2,0x99,0x1d,0x61,0x6e,0x64,0x70,0x75, +0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x99,0x61,0xa2,0xea,0x68,0xa4, +0x14,0x6a,0x10,0x6b,0xa2,0x47,4,0x63,0x92,0x65,0xa2,0x83,0x72,0xa2,0xa1,0x73, +0xa2,0xb3,0x75,0x1f,0x6e,0x69,0x66,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72, +0x61,0x70,0x68,0x73,0xa2,0x47,0x18,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e, +8,0x65,0x71,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,0x68,0xa5,0x42,0x69, +0xa5,0x48,0x14,0x6f,0x6d,0x70,0x61,0x74,0xa2,0x45,1,0x66,0x96,0x69,1,0x62, +0x44,0x64,0x17,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x12,0x73,0x75, +0x70,0xa3,0x5f,0x14,0x69,0x6c,0x69,0x74,0x79,0xa2,0x45,1,0x66,0x54,0x69,0x18, +0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x19,0x73,0x75,0x70,0x70, +0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x5f,0x13,0x6f,0x72,0x6d,0x73,0xa3,0x53,0x11, +0x78,0x74,8,0x65,0xf,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,0x68,0xa5, +0x42,0x69,0xa5,0x48,0x61,0xa3,0x46,0x62,0xa3,0x5e,0x63,0xa3,0xc5,0x64,0xa3,0xd1, +0x19,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0x73,0x75,0x70,0x94,0x16,0x70,0x6c,0x65, +0x6d,0x65,0x6e,0x74,0x95,1,0x74,0x50,0x79,0x14,0x6d,0x62,0x6f,0x6c,0x73,0x9a, +0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x9b, +0x14,0x72,0x6f,0x6b,0x65,0x73,0xa3,0x82,2,0x6e,0x48,0x72,0x64,0x75,0x1d,0x63, +0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69,0x61,0x6e,0xa3,0xde,0x1d, +0x61,0x64,0x69,0x61,0x6e,0x73,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x63,0x12, +0x69,0x61,0x6e,0xa3,0xa8,2,0x61,0x3a,0x65,0x4c,0x6f,0x16,0x72,0x61,0x73,0x6d, +0x69,0x61,0x6e,0xa5,0x2d,1,0x6b,0x26,0x6d,0xa3,0xa4,0x11,0x6d,0x61,0xa3,0xd4, +1,0x72,0x38,0x73,0x17,0x73,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa5,0x19,0x13, +0x6f,0x6b,0x65,0x65,0x60,0x12,0x73,0x75,0x70,0xa2,0xff,0x16,0x70,0x6c,0x65,0x6d, +0x65,0x6e,0x74,0xa3,0xff,3,0x65,0x3e,0x69,0x8e,0x6f,0xa2,0x71,0x75,0x15,0x70, +0x6c,0x6f,0x79,0x61,0x6e,0xa3,0xe1,1,0x73,0x60,0x76,0x16,0x61,0x6e,0x61,0x67, +0x61,0x72,0x69,0x3e,0x12,0x65,0x78,0x74,0xa2,0xb3,1,0x61,0xa5,0x44,0x65,0x13, +0x6e,0x64,0x65,0x64,0xa2,0xb3,0x10,0x61,0xa5,0x44,0x13,0x65,0x72,0x65,0x74,0xa3, +0x5a,2,0x61,0x3a,0x6e,0x82,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa5, +0x2f,0x18,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x73,0x2e,2,0x65,0x30,0x66, +0x36,0x73,0x11,0x75,0x70,0xa3,0x83,0x11,0x78,0x74,0xa3,0xe0,0x18,0x6f,0x72,0x73, +0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x14,0x67,0x62,0x61,0x74,0x73,0x91,1,0x67, +0x3e,0x6d,0x12,0x69,0x6e,0x6f,0xa2,0xab,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xab, +0x11,0x72,0x61,0xa5,0x1a,8,0x6d,0x5f,0x6d,0x3a,0x6e,0x48,0x73,0x7a,0x76,0xa2, +0x4b,0x77,0x12,0x69,0x64,0x65,0x43,0x11,0x65,0x64,0x32,0x12,0x69,0x61,0x6c,0x33, +2,0x61,0x40,0x62,0x37,0x6f,1,0x62,0x28,0x6e,0x10,0x65,0x21,0x13,0x72,0x65, +0x61,0x6b,0x37,0x10,0x72,0x34,0x12,0x72,0x6f,0x77,0x35,2,0x6d,0x38,0x71,0x46, +0x75,1,0x62,0x3d,0x70,0x3e,0x11,0x65,0x72,0x3f,1,0x61,0x24,0x6c,0x39,0x11, +0x6c,0x6c,0x39,1,0x72,0x3b,0x75,0x12,0x61,0x72,0x65,0x3b,0x12,0x65,0x72,0x74, +0x40,0x13,0x69,0x63,0x61,0x6c,0x41,0x63,0x58,0x65,0x92,0x66,0x96,0x69,1,0x6e, +0x36,0x73,0x10,0x6f,0x30,0x14,0x6c,0x61,0x74,0x65,0x64,0x31,0x11,0x69,0x74,0x2e, +0x12,0x69,0x61,0x6c,0x2f,2,0x61,0x36,0x69,0x48,0x6f,0x10,0x6d,0x24,0x12,0x70, +0x61,0x74,0x25,0x10,0x6e,0x22,0x15,0x6f,0x6e,0x69,0x63,0x61,0x6c,0x23,0x13,0x72, +0x63,0x6c,0x65,0x27,0x11,0x6e,0x63,0x27,2,0x69,0x3a,0x6f,0x44,0x72,0x10,0x61, +0x2c,0x14,0x63,0x74,0x69,0x6f,0x6e,0x2d,0x10,0x6e,0x28,0x11,0x61,0x6c,0x29,0x11, +0x6e,0x74,0x2b,4,0x61,0x3a,0x66,0x4c,0x68,0x5e,0x6e,0x70,0x77,0x2a,0x12,0x69, +0x64,0x65,0x2b,0x22,0x17,0x6d,0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x26,0x17, +0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x27,0x24,0x17,0x61,0x6c,0x66,0x77,0x69, +0x64,0x74,0x68,0x25,0x20,1,0x61,0x30,0x65,0x14,0x75,0x74,0x72,0x61,0x6c,0x21, +0x28,0x13,0x72,0x72,0x6f,0x77,0x29,0xd,0x6e,0xc0,0xfb,0x73,0x6d,0x73,0x3a,0x74, +0x98,0x75,0xa2,0x49,0x7a,2,0x6c,0x3b,0x70,0x3d,0x73,0x39,5,0x6f,0x28,0x6f, +0x57,0x70,0x34,0x75,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x45,0x11,0x61,0x63, +1,0x65,0x32,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x31,0x18,0x73,0x65,0x70, +0x61,0x72,0x61,0x74,0x6f,0x72,0x39,0x63,0x53,0x6b,0x55,0x6d,0x51,0x1d,0x69,0x74, +0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x27,1,0x6e,0x40, +0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x23, +0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x21,0x6e,0x8a,0x6f,0xa2,0x47,0x70, +8,0x66,0x14,0x66,0x5b,0x69,0x59,0x6f,0x4f,0x72,0x24,0x73,0x49,0x17,0x69,0x76, +0x61,0x74,0x65,0x75,0x73,0x65,0x43,0x61,0x2c,0x63,0x4d,0x64,0x47,0x65,0x4b,0x1f, +0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72, +0x3d,2,0x64,0x33,0x6c,0x35,0x6f,0x36,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e, +0x67,0x6d,0x61,0x72,0x6b,0x2d,1,0x70,0x7c,0x74,0x12,0x68,0x65,0x72,3,0x6c, +0x38,0x6e,0x42,0x70,0x4c,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x57,0x14,0x65,0x74, +0x74,0x65,0x72,0x2b,0x14,0x75,0x6d,0x62,0x65,0x72,0x37,0x19,0x75,0x6e,0x63,0x74, +0x75,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75, +0x61,0x74,0x69,0x6f,0x6e,0x49,0x66,0x9e,0x66,0x88,0x69,0xa2,0x4b,0x6c,0xa2,0x5c, +0x6d,4,0x61,0x60,0x63,0x31,0x65,0x2f,0x6e,0x2d,0x6f,0x15,0x64,0x69,0x66,0x69, +0x65,0x72,1,0x6c,0x30,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x55,0x14,0x65,0x74, +0x74,0x65,0x72,0x29,0x17,0x74,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x51,1,0x69, +0x2e,0x6f,0x13,0x72,0x6d,0x61,0x74,0x41,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x5b,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c, +0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,6,0x6d,0x18,0x6d, +0x29,0x6f,0x28,0x74,0x27,0x75,0x23,0x2a,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65, +0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x65,0x28,0x69,0x3c,0x6c,0x25,0x19,0x74,0x74, +0x65,0x72,0x6e,0x75,0x6d,0x62,0x65,0x72,0x35,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61, +0x72,0x61,0x74,0x6f,0x72,0x3b,0x63,0x44,0x64,0xa2,0x60,0x65,0x1b,0x6e,0x63,0x6c, +0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2f,6,0x6e,0x39,0x6e,0x46,0x6f, +0x4e,0x73,0x45,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f, +0x6c,0x53,0x20,0x12,0x74,0x72,0x6c,0x3f,0x42,0x10,0x6e,1,0x6e,0x2c,0x74,0x12, +0x72,0x6f,0x6c,0x3f,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75, +0x61,0x74,0x69,0x6f,0x6e,0x4d,0x63,0x3f,0x66,0x41,0x6c,0x1d,0x6f,0x73,0x65,0x70, +0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4b,2,0x61,0x30,0x65,0x4a, +0x69,0x12,0x67,0x69,0x74,0x33,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61, +0x74,0x69,0x6f,0x6e,0x47,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65, +0x72,0x33,0,0x13,0x6e,0xc1,0xf,0x74,0x76,0x74,0x4c,0x76,0x9a,0x77,0xa2,0x48, +0x79,0xa2,0x49,0x7a,1,0x61,0x2c,0x68,0x12,0x61,0x69,0x6e,0x8b,0x11,0x69,0x6e, +0x85,2,0x61,0x36,0x65,0x3c,0x68,0x14,0x69,0x6e,0x79,0x65,0x68,0xa3,0x66,1, +0x68,0x71,0x77,0x73,1,0x68,0x28,0x74,0x10,0x68,0x77,0x16,0x6d,0x61,0x72,0x62, +0x75,0x74,0x61,0x74,0x13,0x67,0x6f,0x61,0x6c,0x3d,0x1a,0x65,0x72,0x74,0x69,0x63, +0x61,0x6c,0x74,0x61,0x69,0x6c,0xa3,0x67,0x11,0x61,0x77,0x79,1,0x65,0x32,0x75, +0x11,0x64,0x68,0x80,0x11,0x68,0x65,0x83,0x10,0x68,0x7a,1,0x62,0x34,0x77,0x16, +0x69,0x74,0x68,0x74,0x61,0x69,0x6c,0x7f,0x14,0x61,0x72,0x72,0x65,0x65,0x7d,0x6e, +0xa2,0x4c,0x70,0xa2,0x69,0x71,0xa2,0x69,0x72,0xa2,0x6f,0x73,5,0x74,0x22,0x74, +0x38,0x77,0x4c,0x79,0x16,0x72,0x69,0x61,0x63,0x77,0x61,0x77,0x6f,0x18,0x72,0x61, +0x69,0x67,0x68,0x74,0x77,0x61,0x77,0xa3,0x55,0x15,0x61,0x73,0x68,0x6b,0x61,0x66, +0x6d,0x61,0x2e,0x65,0x38,0x68,0x11,0x69,0x6e,0x6b,0x10,0x64,0x62,0x11,0x68,0x65, +0x65,1,0x65,0x2e,0x6d,0x13,0x6b,0x61,0x74,0x68,0x69,0x10,0x6e,0x67,2,0x6f, +0x2c,0x75,0x50,0x79,0x10,0x61,0x91,1,0x6a,0x28,0x6f,0x10,0x6e,0x55,0x1a,0x6f, +0x69,0x6e,0x69,0x6e,0x67,0x67,0x72,0x6f,0x75,0x70,0x21,0x10,0x6e,0x57,0x10,0x65, +0x59,0x10,0x61,1,0x66,0x5b,0x70,0x10,0x68,0x5d,1,0x65,0x38,0x6f,0x18,0x68, +0x69,0x6e,0x67,0x79,0x61,0x79,0x65,0x68,0x93,1,0x68,0x5f,0x76,0x16,0x65,0x72, +0x73,0x65,0x64,0x70,0x65,0x61,0x67,0xc1,0xc7,0x67,0xa4,0x52,0x68,0xa4,0x59,0x6b, +0xa4,0x99,0x6c,0xa4,0xb2,0x6d,2,0x61,0x2e,0x65,0xa4,0x3e,0x69,0x10,0x6d,0x53, +1,0x6c,0xa2,0xe7,0x6e,0x16,0x69,0x63,0x68,0x61,0x65,0x61,0x6e,0,0x12,0x6e, +0x76,0x73,0x51,0x73,0x3e,0x74,0x5c,0x77,0xa0,0x79,0xa2,0x42,0x7a,0x13,0x61,0x79, +0x69,0x6e,0xa3,0x54,0x10,0x61,1,0x64,0x2e,0x6d,0x12,0x65,0x6b,0x68,0xa3,0x4c, +0x11,0x68,0x65,0xa3,0x4b,3,0x61,0x38,0x65,0x3c,0x68,0x4a,0x77,0x13,0x65,0x6e, +0x74,0x79,0xa3,0x51,0x10,0x77,0xa3,0x4d,1,0x6e,0xa3,0x4e,0x74,0x10,0x68,0xa3, +0x4f,0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x50,0x11,0x61,0x77,0xa3,0x52,0x12,0x6f, +0x64,0x68,0xa3,0x53,0x6e,0x3a,0x6f,0x40,0x70,0x46,0x71,0x4a,0x72,0x12,0x65,0x73, +0x68,0xa3,0x4a,0x11,0x75,0x6e,0xa3,0x46,0x11,0x6e,0x65,0xa3,0x47,0x10,0x65,0xa3, +0x48,0x12,0x6f,0x70,0x68,0xa3,0x49,0x67,0x33,0x67,0x38,0x68,0x40,0x6b,0x5e,0x6c, +0x66,0x6d,0x11,0x65,0x6d,0xa3,0x45,0x13,0x69,0x6d,0x65,0x6c,0xa1,1,0x65,0x32, +0x75,0x14,0x6e,0x64,0x72,0x65,0x64,0xa3,0x42,0x11,0x74,0x68,0xa3,0x41,0x12,0x61, +0x70,0x68,0xa3,0x43,0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x44,0x61,0x34,0x62,0x4a, +0x64,0x50,0x66,0x12,0x69,0x76,0x65,0x9f,1,0x6c,0x2a,0x79,0x11,0x69,0x6e,0x97, +0x12,0x65,0x70,0x68,0x95,0x12,0x65,0x74,0x68,0x99,1,0x61,0x30,0x68,0x14,0x61, +0x6d,0x65,0x64,0x68,0x9d,0x13,0x6c,0x65,0x74,0x68,0x9b,0x15,0x61,0x79,0x61,0x6c, +0x61,0x6d,6,0x6e,0x2c,0x6e,0x34,0x72,0x5e,0x73,0x62,0x74,0x11,0x74,0x61,0xa3, +0x63,2,0x67,0x2e,0x6e,0x32,0x79,0x10,0x61,0xa3,0x60,0x10,0x61,0xa3,0x5d,1, +0x61,0xa3,0x5e,0x6e,0x10,0x61,0xa3,0x5f,0x10,0x61,0xa3,0x61,0x11,0x73,0x61,0xa3, +0x62,0x62,0x3c,0x6a,0x42,0x6c,0x10,0x6c,1,0x61,0xa3,0x5b,0x6c,0x10,0x61,0xa3, +0x5c,0x11,0x68,0x61,0xa3,0x59,0x10,0x61,0xa3,0x5a,0x11,0x65,0x6d,0x51,0x10,0x61, +1,0x66,0x37,0x6d,0x11,0x61,0x6c,0x39,1,0x61,0x40,0x65,0x3e,1,0x68,0x28, +0x74,0x10,0x68,0x45,0x40,0x13,0x67,0x6f,0x61,0x6c,0x43,2,0x68,0x3b,0x6d,0x5c, +0x6e,0x1a,0x69,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,1,0x6b,0x2a, +0x70,0x10,0x61,0xa3,0x65,0x15,0x69,0x6e,0x6e,0x61,0x79,0x61,0xa3,0x64,0x1a,0x7a, +0x61,0x6f,0x6e,0x68,0x65,0x68,0x67,0x6f,0x61,0x6c,0x3d,2,0x61,0x3a,0x68,0x44, +0x6e,0x17,0x6f,0x74,0x74,0x65,0x64,0x68,0x65,0x68,0x4b,1,0x66,0x47,0x70,0x10, +0x68,0x49,0x12,0x61,0x70,0x68,0x89,0x11,0x61,0x6d,0x4c,0x12,0x61,0x64,0x68,0x4f, +0x61,0x6e,0x62,0xa2,0x54,0x64,0xa2,0x70,0x65,0x31,0x66,2,0x61,0x3e,0x65,0x4a, +0x69,0x19,0x6e,0x61,0x6c,0x73,0x65,0x6d,0x6b,0x61,0x74,0x68,0x35,0x15,0x72,0x73, +0x69,0x79,0x65,0x68,0x8f,0x86,0x10,0x68,0x33,2,0x66,0x3c,0x69,0x70,0x6c,1, +0x61,0x28,0x65,0x10,0x66,0x27,0x11,0x70,0x68,0x25,0x14,0x72,0x69,0x63,0x61,0x6e, +2,0x66,0x30,0x6e,0x36,0x71,0x11,0x61,0x66,0xa3,0x58,0x11,0x65,0x68,0xa3,0x56, +0x12,0x6f,0x6f,0x6e,0xa3,0x57,0x10,0x6e,0x23,1,0x65,0x4a,0x75,0x10,0x72,0x1f, +0x75,0x73,0x68,0x61,0x73,0x6b,0x69,0x79,0x65,0x68,0x62,0x61,0x72,0x72,0x65,0x65, +0x8d,1,0x68,0x29,0x74,0x10,0x68,0x2b,0x11,0x61,0x6c,0x2c,0x16,0x61,0x74,0x68, +0x72,0x69,0x73,0x68,0x2f,7,0x6e,0x2e,0x6e,0x2c,0x72,0x3e,0x74,0x56,0x75,0x21, +0x18,0x6f,0x6e,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x21,0x28,0x1a,0x69,0x67,0x68, +0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x29,0x2a,0x19,0x72,0x61,0x6e,0x73,0x70, +0x61,0x72,0x65,0x6e,0x74,0x2b,0x63,0x23,0x64,0x40,0x6a,0x56,0x6c,0x26,0x19,0x65, +0x66,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x27,0x24,0x19,0x75,0x61,0x6c,0x6a, +0x6f,0x69,0x6e,0x69,0x6e,0x67,0x25,0x19,0x6f,0x69,0x6e,0x63,0x61,0x75,0x73,0x69, +0x6e,0x67,0x23,0,0x14,0x6e,0xc0,0xe5,0x73,0x5e,0x77,0x23,0x77,0x40,0x78,0x58, +0x7a,0x10,0x77,0x58,1,0x6a,0x75,0x73,0x13,0x70,0x61,0x63,0x65,0x59,1,0x6a, +0x5d,0x6f,0x17,0x72,0x64,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x5d,0x10,0x78,0x21,0x73, +0x4a,0x75,0x7a,0x76,1,0x66,0x7d,0x69,0x7e,0x13,0x72,0x61,0x6d,0x61,0x7e,0x14, +0x66,0x69,0x6e,0x61,0x6c,0x7d,4,0x61,0x51,0x67,0x53,0x70,0x28,0x75,0x30,0x79, +0x57,0x54,0x12,0x61,0x63,0x65,0x55,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x53, +0x15,0x6e,0x6b,0x6e,0x6f,0x77,0x6e,0x21,0x6e,0x60,0x6f,0xa2,0x41,0x70,0xa2,0x50, +0x71,0xa2,0x6e,0x72,1,0x65,0x24,0x69,0x6f,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c, +0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x6f,4,0x65,0x3e,0x6c,0x5b,0x6f, +0x46,0x73,0x45,0x75,0x46,0x14,0x6d,0x65,0x72,0x69,0x63,0x47,0x15,0x78,0x74,0x6c, +0x69,0x6e,0x65,0x5b,0x17,0x6e,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x45,0x10,0x70, +0x48,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49, +1,0x6f,0x3e,0x72,0x4c,0x1a,0x65,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69, +0x63,0x4d,0x4a,0x1b,0x73,0x74,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63, +0x4b,0x10,0x75,0x4e,0x16,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x68,0x7b,0x68, +0x50,0x69,0x86,0x6a,0xa2,0x61,0x6c,0xa2,0x65,0x6d,0x1c,0x61,0x6e,0x64,0x61,0x74, +0x6f,0x72,0x79,0x62,0x72,0x65,0x61,0x6b,0x2d,4,0x32,0x5f,0x33,0x61,0x65,0x34, +0x6c,0x6d,0x79,0x3a,0x13,0x70,0x68,0x65,0x6e,0x3b,0x19,0x62,0x72,0x65,0x77,0x6c, +0x65,0x74,0x74,0x65,0x72,0x6d,2,0x64,0x28,0x6e,0x3c,0x73,0x41,0x3c,0x18,0x65, +0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,0x3d,0x3e,1,0x66,0x3e,0x73,0x11,0x65, +0x70,1,0x61,0x22,0x65,0x14,0x72,0x61,0x62,0x6c,0x65,0x3f,0x18,0x69,0x78,0x6e, +0x75,0x6d,0x65,0x72,0x69,0x63,0x41,2,0x6c,0x63,0x74,0x65,0x76,0x67,1,0x66, +0x43,0x69,0x15,0x6e,0x65,0x66,0x65,0x65,0x64,0x43,0x61,0x42,0x62,0xa2,0x49,0x63, +0xa2,0x76,0x65,0xa2,0xfc,0x67,0x10,0x6c,0x38,0x11,0x75,0x65,0x39,5,0x6d,0xf, +0x6d,0x28,0x70,0x79,0x73,0x7b,0x16,0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x69, +0x23,0x6b,0x38,0x6c,0x24,0x17,0x70,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x25,0x76, +0x13,0x73,0x61,0x72,0x61,0x76,1,0x70,0x2e,0x73,0x13,0x74,0x61,0x72,0x74,0x7b, +0x15,0x72,0x65,0x62,0x61,0x73,0x65,0x79,4,0x32,0x27,0x61,0x29,0x62,0x2b,0x6b, +0x2d,0x72,0x12,0x65,0x61,0x6b,2,0x61,0x36,0x62,0x3e,0x73,0x15,0x79,0x6d,0x62, +0x6f,0x6c,0x73,0x57,0x13,0x66,0x74,0x65,0x72,0x29,1,0x65,0x2a,0x6f,0x11,0x74, +0x68,0x27,0x13,0x66,0x6f,0x72,0x65,0x2b,7,0x6d,0x51,0x6d,0x33,0x6f,0x28,0x70, +0x69,0x72,0x35,1,0x6d,0x76,0x6e,1,0x64,0x3c,0x74,0x1a,0x69,0x6e,0x67,0x65, +0x6e,0x74,0x62,0x72,0x65,0x61,0x6b,0x2f,0x15,0x69,0x74,0x69,0x6f,0x6e,0x61,0x1f, +0x6c,0x6a,0x61,0x70,0x61,0x6e,0x65,0x73,0x65,0x73,0x74,0x61,0x72,0x74,0x65,0x72, +0x6b,1,0x62,0x3a,0x70,0x19,0x6c,0x65,0x78,0x63,0x6f,0x6e,0x74,0x65,0x78,0x74, +0x51,0x18,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x33,0x61,0x6a,0x62,0x2f, +0x6a,0x6b,0x6c,0x30,0x13,0x6f,0x73,0x65,0x70,1,0x61,0x38,0x75,0x18,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x31,0x18,0x72,0x65,0x6e,0x74,0x68,0x65,0x73, +0x69,0x73,0x69,0x1b,0x72,0x72,0x69,0x61,0x67,0x65,0x72,0x65,0x74,0x75,0x72,0x6e, +0x35,2,0x62,0x3e,0x6d,0x46,0x78,0x36,0x18,0x63,0x6c,0x61,0x6d,0x61,0x74,0x69, +0x6f,0x6e,0x37,0x70,0x12,0x61,0x73,0x65,0x71,0x72,0x16,0x6f,0x64,0x69,0x66,0x69, +0x65,0x72,0x73,1,0x64,0x42,0x6e,1,0x6f,0x32,0x75,0x26,0x14,0x6d,0x65,0x72, +0x69,0x63,0x27,0x11,0x6e,0x65,0x21,1,0x65,0x2e,0x69,0x24,0x12,0x67,0x69,0x74, +0x25,0x22,0x14,0x63,0x69,0x6d,0x61,0x6c,0x23,0,0x18,0x6e,0xc4,0x6f,0x74,0xc1, +0x91,0x77,0x96,0x77,0xa2,0x4c,0x78,0xa2,0x70,0x79,0xa2,0x7a,0x7a,6,0x73,0x1e, +0x73,0x34,0x78,0x42,0x79,0x48,0x7a,0x11,0x7a,0x7a,0xa3,0x67,0x10,0x79,1,0x65, +0xa3,0xae,0x6d,0xa3,0x81,0x11,0x78,0x78,0xa3,0x66,0x11,0x79,0x79,0x21,0x61,0x30, +0x69,0x58,0x6d,0x11,0x74,0x68,0xa3,0x80,0x10,0x6e,1,0x61,0x26,0x62,0xa3,0xb1, +0x1a,0x62,0x61,0x7a,0x61,0x72,0x73,0x71,0x75,0x61,0x72,0x65,0xa3,0xb1,0x11,0x6e, +0x68,0x23,2,0x61,0x30,0x63,0x5a,0x6f,0x11,0x6c,0x65,0xa3,0x9b,1,0x6e,0x3c, +0x72,0x10,0x61,0xa2,0x92,0x15,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0x92,0x12,0x63, +0x68,0x6f,0xa3,0xbc,0x11,0x68,0x6f,0xa3,0xbc,1,0x70,0x2c,0x73,0x11,0x75,0x78, +0xa3,0x65,0x11,0x65,0x6f,0x9b,1,0x65,0x2c,0x69,0x72,0x11,0x69,0x69,0x73,0x11, +0x7a,0x69,0xa2,0xc0,0x11,0x64,0x69,0xa3,0xc0,0x74,0x66,0x75,0xa2,0xde,0x76,1, +0x61,0x48,0x69,1,0x73,0x38,0x74,0x10,0x68,0xa2,0xc5,0x13,0x6b,0x75,0x71,0x69, +0xa3,0xc5,0x10,0x70,0xa3,0x64,0x10,0x69,0xa2,0x63,0x10,0x69,0xa3,0x63,7,0x68, +0x3e,0x68,0x34,0x69,0x48,0x6e,0x86,0x6f,0x11,0x74,0x6f,0xa3,0xc4,0x10,0x61,1, +0x61,0x24,0x69,0x6d,0x6a,0x11,0x6e,0x61,0x6b,2,0x62,0x3a,0x66,0x4a,0x72,0x10, +0x68,0xa2,0x9e,0x12,0x75,0x74,0x61,0xa3,0x9e,1,0x65,0x24,0x74,0x6f,0x12,0x74, +0x61,0x6e,0x6f,0x14,0x69,0x6e,0x61,0x67,0x68,0x99,0x11,0x73,0x61,0xa3,0xc3,0x61, +0x36,0x65,0xa2,0x65,0x66,0xa2,0x71,0x67,0x11,0x6c,0x67,0x75,6,0x6c,0x28,0x6c, +0x32,0x6d,0x38,0x6e,0x44,0x76,0x10,0x74,0xa3,0x7f,1,0x65,0x89,0x75,0x97,1, +0x69,0x24,0x6c,0x67,0x10,0x6c,0x67,0x10,0x67,0xa2,0x9a,1,0x73,0x2a,0x75,0x10, +0x74,0xa3,0x9a,0x10,0x61,0xa3,0xc3,0x67,0x36,0x69,0x52,0x6b,0x10,0x72,0xa2,0x99, +0x10,0x69,0xa3,0x99,1,0x61,0x30,0x62,0x7a,0x13,0x61,0x6e,0x77,0x61,0x7b,0x12, +0x6c,0x6f,0x67,0x75,2,0x6c,0x32,0x74,0x34,0x76,0x12,0x69,0x65,0x74,0xa3,0x7f, +0x10,0x65,0x89,0x12,0x68,0x61,0x6d,0xa3,0x6a,1,0x6c,0x2a,0x6e,0x10,0x67,0xa3, +0x62,0x10,0x75,0x68,0x11,0x67,0x75,0x69,0x11,0x6e,0x67,0x99,1,0x67,0x32,0x6e, +0x14,0x6b,0x6e,0x6f,0x77,0x6e,0xa3,0x67,0x11,0x61,0x72,0x8a,0x13,0x69,0x74,0x69, +0x63,0x8b,0x71,0xc1,0x13,0x71,0xa2,0xde,0x72,0xa2,0xe3,0x73,6,0x69,0x8a,0x69, +0x72,0x6f,0xa2,0x4c,0x75,0xa2,0x75,0x79,1,0x6c,0x46,0x72,4,0x63,0x65,0x65, +0xa3,0x5f,0x69,0x2c,0x6a,0xa3,0x60,0x6e,0xa3,0x61,0x11,0x61,0x63,0x65,0x10,0x6f, +0x94,0x16,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0x95,2,0x64,0x3c,0x67,0x4c,0x6e, +1,0x64,0xa3,0x91,0x68,0x62,0x12,0x61,0x6c,0x61,0x63,0x10,0x64,0xa2,0xa6,0x12, +0x68,0x61,0x6d,0xa3,0xa6,0x17,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa3,0x70, +2,0x67,0x3a,0x72,0x52,0x79,0x10,0x6f,0xa2,0xb0,0x12,0x6d,0x62,0x6f,0xa3,0xb0, +1,0x64,0x26,0x6f,0xa3,0xb8,0xa2,0xb7,0x12,0x69,0x61,0x6e,0xa3,0xb7,0x10,0x61, +0xa2,0x98,0x16,0x73,0x6f,0x6d,0x70,0x65,0x6e,0x67,0xa3,0x98,0x11,0x6e,0x64,0xa2, +0x71,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0x71,0x61,0x5c,0x67,0xa2,0x43,0x68,1, +0x61,0x2a,0x72,0x10,0x64,0xa3,0x97,2,0x72,0x28,0x76,0x30,0x77,0x87,0x12,0x61, +0x64,0x61,0xa3,0x97,0x12,0x69,0x61,0x6e,0x87,2,0x6d,0x40,0x72,0x58,0x75,0x10, +0x72,0xa2,0x6f,0x15,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0x6f,1,0x61,0x26,0x72, +0xa3,0x7e,0x14,0x72,0x69,0x74,0x61,0x6e,0xa3,0x7e,1,0x61,0xa3,0x5e,0x62,0xa3, +0x85,0x11,0x6e,0x77,0xa3,0x70,0x11,0x61,0x61,1,0x63,0x2f,0x69,0x23,3,0x65, +0x3e,0x6a,0x48,0x6f,0x4e,0x75,0x10,0x6e,1,0x69,0x24,0x72,0x61,0x10,0x63,0x61, +0x13,0x6a,0x61,0x6e,0x67,0xa3,0x6e,0x11,0x6e,0x67,0xa3,0x6e,1,0x68,0x2a,0x72, +0x10,0x6f,0xa3,0x5d,0x10,0x67,0xa3,0xb6,0x6e,0xa2,0x83,0x6f,0xa4,1,0x70,5, +0x6c,0x1e,0x6c,0x44,0x72,0x4a,0x73,0x1b,0x61,0x6c,0x74,0x65,0x72,0x70,0x61,0x68, +0x6c,0x61,0x76,0x69,0xa3,0x7b,0x11,0x72,0x64,0xa3,0x5c,0x11,0x74,0x69,0xa3,0x7d, +0x61,0x7c,0x65,0xa2,0x54,0x68,3,0x61,0x3e,0x6c,0x4e,0x6e,0x5e,0x6f,0x16,0x65, +0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x5b,0x10,0x67,0xa2,0x5a,0x12,0x73,0x70,0x61, +0xa3,0x5a,2,0x69,0xa3,0x7a,0x70,0xa3,0x7b,0x76,0xa3,0x7c,0x10,0x78,0xa3,0x5b, +2,0x68,0x3e,0x6c,0x50,0x75,0x10,0x63,0xa2,0xa5,0x14,0x69,0x6e,0x68,0x61,0x75, +0xa3,0xa5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0x4b,0x10,0x6d,0xa2, +0x90,0x14,0x79,0x72,0x65,0x6e,0x65,0xa3,0x90,0x11,0x72,0x6d,0xa3,0x59,6,0x6b, +0x36,0x6b,0x56,0x73,0x6e,0x75,0x74,0x79,0x11,0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67, +0x70,0x75,0x61,0x63,0x68,0x75,0x65,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xba,1,0x67, +0x2e,0x6f,0xa2,0x57,0x10,0x6f,0xa3,0x57,0x10,0x62,0xa3,0x84,0x11,0x68,0x75,0xa3, +0x96,0x12,0x73,0x68,0x75,0xa3,0x96,0x61,0x42,0x62,0x9e,0x65,0x10,0x77,1,0x61, +0xa3,0xaa,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,0x97,3,0x62,0x32,0x67,0x40,0x6e, +0x56,0x72,0x10,0x62,0xa3,0x8e,0x15,0x61,0x74,0x61,0x65,0x61,0x6e,0xa3,0x8f,0x10, +0x6d,0xa2,0xc7,0x15,0x75,0x6e,0x64,0x61,0x72,0x69,0xa3,0xc7,0x10,0x64,0xa2,0xbb, +0x16,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,0xa3,0xbb,0x11,0x61,0x74,0xa3,0x8f,4, +0x67,0x3c,0x6c,0x4e,0x72,0xa2,0x8e,0x73,0xa2,0x9c,0x75,0x11,0x67,0x72,0xa3,0xc2, +1,0x61,0x2a,0x68,0x11,0x61,0x6d,0x5b,0x10,0x6d,0x5b,1,0x63,0xa2,0x6a,0x64, +6,0x70,0x41,0x70,0x3a,0x73,0x58,0x74,0x86,0x75,0x14,0x79,0x67,0x68,0x75,0x72, +0xa3,0xc2,0x11,0x65,0x72,1,0x6d,0x2c,0x73,0x12,0x69,0x61,0x6e,0x9b,0x11,0x69, +0x63,0xa3,0x59,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61,0x72,0x61,0x62, +0x69,0x61,0x6e,0xa3,0x85,0x13,0x64,0x69,0x61,0x6e,0xa3,0xb8,0x14,0x75,0x72,0x6b, +0x69,0x63,0xa3,0x58,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72, +0x61,0x62,0x69,0x61,0x6e,0xa3,0x8e,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e, +0xa3,0x4c,0x14,0x74,0x61,0x6c,0x69,0x63,0x5d,1,0x68,0x26,0x6b,0xa3,0x6d,0x12, +0x69,0x6b,0x69,0xa3,0x6d,2,0x69,0x2c,0x6b,0x30,0x79,0x10,0x61,0x5f,0x11,0x79, +0x61,0x5f,0x10,0x68,0xa3,0x58,2,0x61,0x36,0x67,0x3c,0x6d,0x10,0x61,0x84,0x12, +0x6e,0x79,0x61,0x85,0x11,0x67,0x65,0xa3,0xab,0x10,0x65,0xa3,0xab,0x68,0xc3,0x15, +0x6b,0xc2,0x2c,0x6b,0xa4,0x17,0x6c,0xa4,0xba,0x6d,8,0x6f,0x46,0x6f,0x48,0x72, +0x74,0x74,0x80,0x75,0x86,0x79,1,0x61,0x28,0x6d,0x10,0x72,0x59,0x13,0x6e,0x6d, +0x61,0x72,0x59,2,0x64,0x2e,0x6e,0x32,0x6f,0x10,0x6e,0xa3,0x72,0x10,0x69,0xa3, +0xa3,0x10,0x67,0x56,0x14,0x6f,0x6c,0x69,0x61,0x6e,0x57,0x10,0x6f,0xa2,0x95,0x10, +0x6f,0xa3,0x95,0x11,0x65,0x69,0xa3,0x73,0x11,0x6c,0x74,0xa2,0xa4,0x12,0x61,0x6e, +0x69,0xa3,0xa4,0x61,0x36,0x65,0xa2,0x67,0x69,0xa2,0xbd,0x6c,0x11,0x79,0x6d,0x55, +6,0x6e,0x38,0x6e,0x32,0x72,0x5c,0x73,0x6c,0x79,0x10,0x61,0xa3,0x55,1,0x64, +0x38,0x69,0xa2,0x79,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,0x79,0xa2,0x54,0x12, +0x61,0x69,0x63,0xa3,0x54,0x10,0x63,0xa2,0xa9,0x12,0x68,0x65,0x6e,0xa3,0xa9,0x18, +0x61,0x72,0x61,0x6d,0x67,0x6f,0x6e,0x64,0x69,0xa3,0xaf,0x68,0x36,0x6b,0x4c,0x6c, +0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x55,1,0x61,0x26,0x6a,0xa3,0xa0,0x13,0x6a, +0x61,0x6e,0x69,0xa3,0xa0,0x10,0x61,0xa2,0xb4,0x12,0x73,0x61,0x72,0xa3,0xb4,3, +0x64,0x78,0x65,0x94,0x6e,0xa2,0x42,0x72,1,0x63,0xa3,0x8d,0x6f,0xa2,0x56,0x13, +0x69,0x74,0x69,0x63,1,0x63,0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79, +0x70,0x68,0x73,0xa3,0x56,0x15,0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0x8d,1,0x65, +0x26,0x66,0xa3,0xb5,0x16,0x66,0x61,0x69,0x64,0x72,0x69,0x6e,0xa3,0xb5,0x17,0x74, +0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa3,0x73,0x10,0x64,0xa2,0x8c,0x17,0x65,0x6b, +0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0x8c,0x11,0x61,0x6f,0xa3,0x5c,6,0x6e,0x1a, +0x6e,0x34,0x6f,0x38,0x70,0x3e,0x74,0x11,0x68,0x69,0xa3,0x78,0x11,0x64,0x61,0x4b, +0x11,0x72,0x65,0xa3,0x77,0x11,0x65,0x6c,0xa3,0x8a,0x61,0x32,0x68,0xa2,0x44,0x69, +0x11,0x74,0x73,0xa3,0xbf,5,0x74,0x23,0x74,0x34,0x77,0x56,0x79,0x13,0x61,0x68, +0x6c,0x69,0xa3,0x4f,0x14,0x61,0x6b,0x61,0x6e,0x61,0x4c,0x19,0x6f,0x72,0x68,0x69, +0x72,0x61,0x67,0x61,0x6e,0x61,0x8d,0x10,0x69,0xa3,0xc6,0x69,0x38,0x6c,0x40,0x6e, +1,0x61,0x4d,0x6e,0x12,0x61,0x64,0x61,0x4b,0x12,0x74,0x68,0x69,0xa3,0x78,0x10, +0x69,0xa3,0x4f,4,0x61,0x40,0x69,0x52,0x6d,0x70,0x6f,0x7c,0x75,0x15,0x64,0x61, +0x77,0x61,0x64,0x69,0xa3,0x91,0x10,0x72,0x92,0x15,0x6f,0x73,0x68,0x74,0x68,0x69, +0x93,0x1d,0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74, +0xa3,0xbf,1,0x65,0x24,0x72,0x4f,0x10,0x72,0x4f,0x10,0x6a,0xa2,0x9d,0x11,0x6b, +0x69,0xa3,0x9d,4,0x61,0x5c,0x65,0x90,0x69,0xa0,0x6f,0xa2,0x5d,0x79,1,0x63, +0x34,0x64,0x10,0x69,0xa2,0x6c,0x11,0x61,0x6e,0xa3,0x6c,0x10,0x69,0xa2,0x6b,0x11, +0x61,0x6e,0xa3,0x6b,2,0x6e,0x42,0x6f,0x46,0x74,3,0x66,0xa3,0x50,0x67,0xa3, +0x51,0x69,0x24,0x6e,0x53,0x10,0x6e,0x53,0x10,0x61,0xa3,0x6a,0x50,0x10,0x6f,0x51, +0x11,0x70,0x63,0xa2,0x52,0x11,0x68,0x61,0xa3,0x52,2,0x6d,0x2e,0x6e,0x36,0x73, +0x10,0x75,0xa3,0x83,0x10,0x62,0x80,0x10,0x75,0x81,2,0x61,0xa3,0x53,0x62,0x83, +0x65,0x11,0x61,0x72,1,0x61,0xa3,0x53,0x62,0x83,0x11,0x6d,0x61,0xa3,0x8b,0x68, +0x6e,0x69,0xa2,0x95,0x6a,2,0x61,0x30,0x70,0x52,0x75,0x11,0x72,0x63,0xa3,0x94, +1,0x6d,0x38,0x76,0x10,0x61,0xa2,0x4e,0x13,0x6e,0x65,0x73,0x65,0xa3,0x4e,0x10, +0x6f,0xa3,0xad,0x11,0x61,0x6e,0xa3,0x69,6,0x6c,0x1e,0x6c,0x34,0x6d,0x3a,0x72, +0x48,0x75,0x11,0x6e,0x67,0xa3,0x4c,0x11,0x75,0x77,0xa3,0x9c,0x10,0x6e,1,0x67, +0xa3,0x4b,0x70,0xa3,0xba,0x11,0x6b,0x74,0x8d,0x61,0x3c,0x65,0xa2,0x43,0x69,0x11, +0x72,0x61,0x48,0x13,0x67,0x61,0x6e,0x61,0x49,1,0x6e,0x34,0x74,0x10,0x72,0xa2, +0xa2,0x11,0x61,0x6e,0xa3,0xa2,0x42,6,0x6f,0xe,0x6f,0x77,0x73,0xa3,0x49,0x74, +0xa3,0x4a,0x75,0x12,0x6e,0x6f,0x6f,0x77,0x62,0xa3,0xac,0x67,0x3e,0x69,0x42,0x19, +0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa3,0xb6,0x44,0x11,0x75,0x6c, +0x45,0x11,0x62,0x72,0x46,0x11,0x65,0x77,0x47,2,0x6d,0x2e,0x6e,0x4a,0x74,0x11, +0x61,0x6c,0x5d,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,0x61,0x69, +0x63,0xa3,0x74,2,0x64,0x66,0x68,0x6a,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69, +0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e, +0xa3,0x7d,0x13,0x6c,0x61,0x76,0x69,0xa3,0x7a,0x10,0x73,0xa3,0x4d,0x15,0x65,0x72, +0x69,0x74,0x65,0x64,0x23,0x64,0xc1,0xd,0x64,0xa2,0x7a,0x65,0xa2,0xc1,0x67,4, +0x65,0x82,0x6c,0x9a,0x6f,0xa2,0x46,0x72,0xa2,0x55,0x75,2,0x6a,0x3c,0x6e,0x4e, +0x72,1,0x6d,0x24,0x75,0x41,0x13,0x75,0x6b,0x68,0x69,0x41,1,0x61,0x24,0x72, +0x3f,0x13,0x72,0x61,0x74,0x69,0x3f,0x18,0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64, +0x69,0xa3,0xb3,0x10,0x6f,1,0x6b,0xa3,0x48,0x72,0x38,0x13,0x67,0x69,0x61,0x6e, +0x39,0x11,0x61,0x67,0x90,0x15,0x6f,0x6c,0x69,0x74,0x69,0x63,0x91,1,0x6e,0x30, +0x74,0x10,0x68,0x3a,0x11,0x69,0x63,0x3b,1,0x67,0xa3,0xb3,0x6d,0xa3,0xaf,1, +0x61,0x32,0x65,1,0x65,0x24,0x6b,0x3d,0x10,0x6b,0x3d,0x10,0x6e,0xa2,0x89,0x12, +0x74,0x68,0x61,0xa3,0x89,4,0x65,0x46,0x69,0x6c,0x6f,0x8c,0x73,0x9a,0x75,0x11, +0x70,0x6c,0xa2,0x87,0x13,0x6f,0x79,0x61,0x6e,0xa3,0x87,1,0x73,0x38,0x76,0x10, +0x61,0x34,0x15,0x6e,0x61,0x67,0x61,0x72,0x69,0x35,0x13,0x65,0x72,0x65,0x74,0x33, +1,0x61,0x36,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa3,0xbe,0x10,0x6b, +0xa3,0xbe,0x11,0x67,0x72,0xa2,0xb2,0x10,0x61,0xa3,0xb2,0x11,0x72,0x74,0x33,2, +0x67,0x3a,0x6c,0x72,0x74,0x11,0x68,0x69,0x36,0x13,0x6f,0x70,0x69,0x63,0x37,0x10, +0x79,2,0x64,0xa3,0x45,0x68,0xa3,0x46,0x70,0xa2,0x47,0x1e,0x74,0x69,0x61,0x6e, +0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x47,1,0x62,0x36, +0x79,0x10,0x6d,0xa2,0xb9,0x12,0x61,0x69,0x63,0xa3,0xb9,0x10,0x61,0xa2,0x88,0x12, +0x73,0x61,0x6e,0xa3,0x88,0x61,0xa2,0xc9,0x62,0xa4,0x2e,0x63,6,0x6f,0x52,0x6f, +0x76,0x70,0x92,0x75,0xa2,0x41,0x79,1,0x70,0x3e,0x72,2,0x69,0x2a,0x6c,0x31, +0x73,0xa3,0x44,0x13,0x6c,0x6c,0x69,0x63,0x31,0x10,0x72,1,0x69,0x34,0x6f,0x15, +0x6d,0x69,0x6e,0x6f,0x61,0x6e,0xa3,0xc1,0x11,0x6f,0x74,0x7f,1,0x6d,0x30,0x70, +0x10,0x74,0x2e,0x11,0x69,0x63,0x2f,0x12,0x6d,0x6f,0x6e,0x21,1,0x6d,0x28,0x72, +0x10,0x74,0x7f,0x10,0x6e,0xa3,0xc1,0x16,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa3, +0x65,0x61,0x32,0x68,0xa2,0x41,0x69,0x11,0x72,0x74,0xa3,0x43,3,0x6b,0x4c,0x6e, +0x50,0x72,0x76,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e, +0x69,0x61,0x6e,0xa3,0x9f,0x10,0x6d,0xa3,0x76,1,0x61,0x24,0x73,0x71,0x1d,0x64, +0x69,0x61,0x6e,0x61,0x62,0x6f,0x72,0x69,0x67,0x69,0x6e,0x61,0x6c,0x71,0x10,0x69, +0xa2,0x68,0x11,0x61,0x6e,0xa3,0x68,3,0x61,0x32,0x65,0x44,0x6f,0x52,0x72,0x10, +0x73,0xa3,0xbd,1,0x6b,0x26,0x6d,0xa3,0x42,0x11,0x6d,0x61,0xa3,0x76,0x10,0x72, +0x2c,0x13,0x6f,0x6b,0x65,0x65,0x2d,0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa3, +0xbd,6,0x68,0x4a,0x68,0x48,0x6e,0x4e,0x72,0x76,0x76,1,0x65,0x2a,0x73,0x10, +0x74,0xa3,0x75,0x13,0x73,0x74,0x61,0x6e,0xa3,0x75,0x11,0x6f,0x6d,0xa3,0xa1,0x11, +0x61,0x74,0x1f,0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79, +0x70,0x68,0x73,0xa3,0x9c,1,0x61,0x3e,0x6d,2,0x65,0x2a,0x69,0xa3,0x74,0x6e, +0x27,0x13,0x6e,0x69,0x61,0x6e,0x27,0x10,0x62,0x24,0x11,0x69,0x63,0x25,0x64,0x30, +0x66,0x44,0x67,0x11,0x68,0x62,0xa3,0x9f,0x10,0x6c,1,0x61,0x26,0x6d,0xa3,0xa7, +0x10,0x6d,0xa3,0xa7,0x11,0x61,0x6b,0xa3,0x93,6,0x6c,0x3c,0x6c,0x52,0x6f,0x56, +0x72,0x66,0x75,1,0x67,0x30,0x68,1,0x64,0x79,0x69,0x10,0x64,0x79,0x10,0x69, +0x8e,0x13,0x6e,0x65,0x73,0x65,0x8f,0x11,0x69,0x73,0xa1,0x11,0x70,0x6f,0x2a,0x13, +0x6d,0x6f,0x66,0x6f,0x2b,0x10,0x61,1,0x68,0x2e,0x69,0x7c,0x12,0x6c,0x6c,0x65, +0x7d,0xa2,0x41,0x11,0x6d,0x69,0xa3,0x41,0x61,0x48,0x65,0x9c,0x68,1,0x61,0x2a, +0x6b,0x10,0x73,0xa3,0xa8,0x15,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa3,0xa8,3,0x6c, +0x3a,0x6d,0x48,0x73,0x54,0x74,1,0x61,0x24,0x6b,0x9f,0x10,0x6b,0x9f,0x10,0x69, +0x9c,0x13,0x6e,0x65,0x73,0x65,0x9d,0x10,0x75,0xa2,0x82,0x10,0x6d,0xa3,0x82,0x10, +0x73,0xa2,0x86,0x13,0x61,0x76,0x61,0x68,0xa3,0x86,0x11,0x6e,0x67,0x28,0x12,0x61, +0x6c,0x69,0x29,3,0x6c,0x42,0x6e,0x90,0x74,0xa2,0x46,0x76,0x24,0x17,0x6f,0x77, +0x65,0x6c,0x6a,0x61,0x6d,0x6f,0x25,0x22,1,0x65,0x54,0x76,0x28,1,0x73,0x38, +0x74,0x2a,0x17,0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x2b,0x16,0x79,0x6c,0x6c, +0x61,0x62,0x6c,0x65,0x29,0x18,0x61,0x64,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x23, +1,0x61,0x21,0x6f,0x1a,0x74,0x61,0x70,0x70,0x6c,0x69,0x63,0x61,0x62,0x6c,0x65, +0x21,0x26,0x1a,0x72,0x61,0x69,0x6c,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x27,1, +0x6e,0x2c,0x79,0x22,0x11,0x65,0x73,0x23,0x20,0x10,0x6f,0x21,1,0x6e,0x2c,0x79, +0x22,0x11,0x65,0x73,0x23,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22, +0x11,0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,2, +0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65, +0x25,0x20,0x10,0x6f,0x21,0xb,0x72,0x39,0x76,0xc,0x76,0x33,0x78,0x2a,0x7a,0x11, +0x77,0x6a,0x43,0x10,0x78,0x21,0x72,0x28,0x73,0x50,0x74,0x31,1,0x65,0x24,0x69, +0x39,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f, +0x72,0x39,1,0x6d,0x35,0x70,0x18,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b, +0x35,0x6c,0x1f,0x6c,0x3c,0x6f,0x4a,0x70,1,0x70,0x37,0x72,0x14,0x65,0x70,0x65, +0x6e,0x64,0x37,0x28,1,0x66,0x2b,0x76,0x2c,0x10,0x74,0x2f,0x13,0x74,0x68,0x65, +0x72,0x21,0x63,0x4c,0x65,0x64,0x67,1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66, +0x74,0x65,0x72,0x7a,0x77,0x6a,0x41,0x10,0x7a,0x41,2,0x6e,0x23,0x6f,0x24,0x72, +0x25,0x14,0x6e,0x74,0x72,0x6f,0x6c,0x23,2,0x62,0x34,0x6d,0x4e,0x78,0x26,0x13, +0x74,0x65,0x6e,0x64,0x27,0x3a,1,0x61,0x24,0x67,0x3d,0x11,0x73,0x65,0x3a,0x12, +0x67,0x61,0x7a,0x3d,0x3e,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x3f,9,0x6e, +0x4a,0x6e,0x34,0x6f,0x44,0x73,0x60,0x75,0x94,0x78,0x10,0x78,0x21,0x10,0x75,0x2a, +0x14,0x6d,0x65,0x72,0x69,0x63,0x2b,1,0x6c,0x2c,0x74,0x12,0x68,0x65,0x72,0x21, +0x14,0x65,0x74,0x74,0x65,0x72,0x2d,3,0x63,0x36,0x65,0x46,0x70,0x31,0x74,0x32, +0x12,0x65,0x72,0x6d,0x33,0x3c,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3d,0x2e, +0x10,0x70,0x2f,0x10,0x70,0x34,0x12,0x70,0x65,0x72,0x35,0x61,0x46,0x63,0x52,0x65, +0x64,0x66,0x72,0x6c,2,0x65,0x2d,0x66,0x3b,0x6f,0x28,0x12,0x77,0x65,0x72,0x29, +0x10,0x74,0x22,0x12,0x65,0x72,0x6d,0x23,1,0x6c,0x24,0x72,0x37,0x24,0x12,0x6f, +0x73,0x65,0x25,0x10,0x78,0x38,0x13,0x74,0x65,0x6e,0x64,0x39,0x10,0x6f,0x26,0x13, +0x72,0x6d,0x61,0x74,0x27,0,0x10,0x6c,0x88,0x72,0x40,0x72,0x36,0x73,0x5e,0x77, +0x7a,0x78,0x8a,0x7a,0x11,0x77,0x6a,0x4b,1,0x65,0x24,0x69,0x3b,0x1e,0x67,0x69, +0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x3b,1,0x69, +0x24,0x71,0x3f,0x18,0x6e,0x67,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x3f,0x17,0x73, +0x65,0x67,0x73,0x70,0x61,0x63,0x65,0x4d,0x10,0x78,0x21,0x6c,0x36,0x6d,0x3c,0x6e, +0x76,0x6f,0x13,0x74,0x68,0x65,0x72,0x21,1,0x65,0x23,0x66,0x35,3,0x62,0x37, +0x69,0x28,0x6c,0x29,0x6e,0x2b,0x10,0x64,1,0x6c,0x34,0x6e,0x11,0x75,0x6d,0x2a, +0x12,0x6c,0x65,0x74,0x37,0x14,0x65,0x74,0x74,0x65,0x72,0x29,2,0x65,0x36,0x6c, +0x39,0x75,0x2c,0x14,0x6d,0x65,0x72,0x69,0x63,0x2d,0x14,0x77,0x6c,0x69,0x6e,0x65, +0x39,0x66,0x3f,0x66,0x40,0x67,0x4e,0x68,0x70,0x6b,0x10,0x61,0x26,0x15,0x74,0x61, +0x6b,0x61,0x6e,0x61,0x27,0x10,0x6f,0x24,0x13,0x72,0x6d,0x61,0x74,0x25,1,0x61, +0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,0x74,0x65,0x72,0x7a,0x77,0x6a,0x49,0x10,0x7a, +0x49,1,0x65,0x24,0x6c,0x3d,0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65, +0x72,0x3d,0x61,0x86,0x63,0x92,0x64,0x94,0x65,2,0x62,0x44,0x6d,0x5e,0x78,0x2e, +0x13,0x74,0x65,0x6e,0x64,0x32,0x15,0x6e,0x75,0x6d,0x6c,0x65,0x74,0x2f,0x42,1, +0x61,0x24,0x67,0x45,0x11,0x73,0x65,0x42,0x12,0x67,0x61,0x7a,0x45,0x46,0x16,0x6f, +0x64,0x69,0x66,0x69,0x65,0x72,0x47,0x15,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x10, +0x72,0x31,1,0x6f,0x24,0x71,0x41,0x18,0x75,0x62,0x6c,0x65,0x71,0x75,0x6f,0x74, +0x65,0x41,2,0x63,0x32,0x6e,0x3c,0x6f,0x22,0x12,0x70,0x65,0x6e,0x23,0x24,0x13, +0x6c,0x6f,0x73,0x65,0x25,0x20,0x12,0x6f,0x6e,0x65,0x21,6,0x6f,0x65,0x6f,0x4a, +0x72,0x5c,0x74,0x64,0x76,0x1d,0x69,0x73,0x75,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72, +0x6c,0x65,0x66,0x74,0x3d,0x18,0x76,0x65,0x72,0x73,0x74,0x72,0x75,0x63,0x6b,0x2d, +0x13,0x69,0x67,0x68,0x74,0x2f,0x11,0x6f,0x70,0x30,0x12,0x61,0x6e,0x64,2,0x62, +0x32,0x6c,0x62,0x72,0x13,0x69,0x67,0x68,0x74,0x3b,0x14,0x6f,0x74,0x74,0x6f,0x6d, +0x32,0x12,0x61,0x6e,0x64,1,0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x35,0x12, +0x65,0x66,0x74,0x3f,0x12,0x65,0x66,0x74,0x36,0x17,0x61,0x6e,0x64,0x72,0x69,0x67, +0x68,0x74,0x39,0x62,0x2c,0x6c,0x5c,0x6e,0x10,0x61,0x21,0x14,0x6f,0x74,0x74,0x6f, +0x6d,0x22,0x12,0x61,0x6e,0x64,1,0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x27, +0x12,0x65,0x66,0x74,0x25,0x12,0x65,0x66,0x74,0x28,0x17,0x61,0x6e,0x64,0x72,0x69, +0x67,0x68,0x74,0x2b,0xd,0x6e,0xaa,0x72,0x70,0x72,0x92,0x73,0xa2,0x46,0x74,0xa2, +0x54,0x76,1,0x69,0x60,0x6f,0x12,0x77,0x65,0x6c,0x62,1,0x64,0x3a,0x69,0x19, +0x6e,0x64,0x65,0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x67,0x17,0x65,0x70,0x65,0x6e, +0x64,0x65,0x6e,0x74,0x65,1,0x72,0x2e,0x73,0x13,0x61,0x72,0x67,0x61,0x61,0x12, +0x61,0x6d,0x61,0x5f,0x1d,0x65,0x67,0x69,0x73,0x74,0x65,0x72,0x73,0x68,0x69,0x66, +0x74,0x65,0x72,0x57,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x6d,0x6f,0x64,0x69, +0x66,0x69,0x65,0x72,0x59,0x12,0x6f,0x6e,0x65,1,0x6c,0x2c,0x6d,0x12,0x61,0x72, +0x6b,0x5d,0x14,0x65,0x74,0x74,0x65,0x72,0x5b,0x6e,0x3c,0x6f,0x7c,0x70,0x18,0x75, +0x72,0x65,0x6b,0x69,0x6c,0x6c,0x65,0x72,0x55,1,0x6f,0x4c,0x75,1,0x6b,0x3c, +0x6d,0x12,0x62,0x65,0x72,0x50,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x53,0x11,0x74, +0x61,0x4f,0x16,0x6e,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x4d,0x13,0x74,0x68,0x65,0x72, +0x21,0x67,0x3e,0x67,0x4a,0x69,0x64,0x6a,0x82,0x6d,0x1d,0x6f,0x64,0x69,0x66,0x79, +0x69,0x6e,0x67,0x6c,0x65,0x74,0x74,0x65,0x72,0x4b,0x1c,0x65,0x6d,0x69,0x6e,0x61, +0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x45,0x1e,0x6e,0x76,0x69,0x73,0x69,0x62, +0x6c,0x65,0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x47,0x14,0x6f,0x69,0x6e,0x65,0x72, +0x49,0x61,0xa2,0xba,0x62,0xa2,0xc0,0x63,1,0x61,0xa2,0xa2,0x6f,0x16,0x6e,0x73, +0x6f,0x6e,0x61,0x6e,0x74,0x2a,8,0x6b,0x67,0x6b,0x48,0x6d,0x52,0x70,0x5c,0x73, +0xa2,0x42,0x77,0x19,0x69,0x74,0x68,0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x43,0x14, +0x69,0x6c,0x6c,0x65,0x72,0x35,0x14,0x65,0x64,0x69,0x61,0x6c,0x37,1,0x6c,0x52, +0x72,0x10,0x65,1,0x63,0x2e,0x66,0x13,0x69,0x78,0x65,0x64,0x3d,0x19,0x65,0x64, +0x69,0x6e,0x67,0x72,0x65,0x70,0x68,0x61,0x3b,0x18,0x61,0x63,0x65,0x68,0x6f,0x6c, +0x64,0x65,0x72,0x39,0x10,0x75,1,0x62,0x3e,0x63,0x1b,0x63,0x65,0x65,0x64,0x69, +0x6e,0x67,0x72,0x65,0x70,0x68,0x61,0x41,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x64,0x3f, +0x64,0x4c,0x66,0x52,0x68,0x5a,0x69,0x1e,0x6e,0x69,0x74,0x69,0x61,0x6c,0x70,0x6f, +0x73,0x74,0x66,0x69,0x78,0x65,0x64,0x33,0x12,0x65,0x61,0x64,0x2d,0x13,0x69,0x6e, +0x61,0x6c,0x2f,0x18,0x65,0x61,0x64,0x6c,0x65,0x74,0x74,0x65,0x72,0x31,0x1d,0x6e, +0x74,0x69,0x6c,0x6c,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x29,0x16,0x76, +0x61,0x67,0x72,0x61,0x68,0x61,0x23,1,0x69,0x4a,0x72,0x10,0x61,0x1f,0x68,0x6d, +0x69,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x6e,0x75,0x6d,0x62,0x65,0x72,0x27,0x12, +0x6e,0x64,0x75,0x25,2,0x72,0x38,0x74,0x46,0x75,0x26,0x15,0x70,0x72,0x69,0x67, +0x68,0x74,0x27,0x20,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x21,1,0x72,0x24,0x75, +0x25,0x22,0x18,0x61,0x6e,0x73,0x66,0x6f,0x72,0x6d,0x65,0x64,1,0x72,0x32,0x75, +0x15,0x70,0x72,0x69,0x67,0x68,0x74,0x25,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x23, +0xd,0x6e,0xc1,0x86,0x73,0xa8,0x73,0x4c,0x74,0xa2,0x76,0x75,0xa2,0x83,0x7a,0xd8, +0x70,0,2,0x6c,0xd9,0x20,0,0x70,0xd9,0x40,0,0x73,0xc3,0,0xfe,0xf, +0,0,0,7,0x6f,0x3c,0x6f,0xff,8,0,0,0,0x70,0x3a,0x75,0x6e, +0x79,0x13,0x6d,0x62,0x6f,0x6c,0xff,0xf,0,0,0,0x11,0x61,0x63,1,0x65, +0x34,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0,0x18,0x73,0x65,0x70,0x61, +0x72,0x61,0x74,0x6f,0x72,0xc3,0,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0xe1, +0,0,0x63,0xff,2,0,0,0,0x65,0x38,0x6b,0xff,4,0,0,0, +0x6d,0xff,1,0,0,0,0x16,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x70, +0,0x1d,0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72, +0x31,1,0x6e,0x40,0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74, +0x74,0x65,0x72,0x25,0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x23,0x6e,0xa2, +0x69,0x6f,0xa2,0x89,0x70,0xfe,0x30,0xf8,0,0,9,0x69,0x33,0x69,0xff,0x10, +0,0,0,0x6f,0xfd,0x80,0,0,0x72,0x54,0x73,0xf9,0,0,0x75,0x12, +0x6e,0x63,0x74,0xfe,0x30,0xf8,0,0,0x15,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff, +0x30,0xf8,0,0,0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0xdd,0,0, +0x61,0x48,0x63,0xfd,0x40,0,0,0x64,0xe9,0,0,0x65,0xfd,0x20,0,0, +0x66,0xff,0x20,0,0,0,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65, +0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x40,0,0xbe,0,3,0x64,0xa7,0, +0x6c,0xab,0,0x6f,0x30,0x75,0x13,0x6d,0x62,0x65,0x72,0xbf,0,0xb2,0,0x1b, +0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa1,1,0x70,0x92, +0x74,0x12,0x68,0x65,0x72,0xe6,0x80,1,3,0x6c,0x40,0x6e,0x4a,0x70,0x56,0x73, +0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,8,0,0,0,0x14,0x65,0x74,0x74,0x65, +0x72,0x61,0x14,0x75,0x6d,0x62,0x65,0x72,0xb3,0,0x19,0x75,0x6e,0x63,0x74,0x75, +0x61,0x74,0x69,0x6f,0x6e,0xfd,0x80,0,0,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xf9,0,0,0x66,0xc0,0xc4,0x66,0xa2,0x47, +0x69,0xa2,0x64,0x6c,0xa2,0x79,0x6d,0xa4,0xc0,4,0x61,0x6c,0x63,0xa5,0,0x65, +0xa3,0x80,0x6e,0xa1,0x6f,0x15,0x64,0x69,0x66,0x69,0x65,0x72,1,0x6c,0x38,0x73, +0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,4,0,0,0,0x14,0x65,0x74,0x74,0x65, +0x72,0x41,1,0x72,0x3c,0x74,0x16,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,1, +0,0,0,0x10,0x6b,0xa5,0xc0,1,0x69,0x32,0x6f,0x13,0x72,0x6d,0x61,0x74, +0xdb,0,0,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69, +0x6f,0x6e,0xff,0x20,0,0,0,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,0x70, +0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x10,0,0,0,0x9c, +7,0x6d,0x18,0x6d,0x41,0x6f,0x28,0x74,0x31,0x75,0x25,0x60,0x1c,0x77,0x65,0x72, +0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x29,0x63,0x3d,0x65,0x28,0x69, +0x42,0x6c,0x29,0x13,0x74,0x74,0x65,0x72,0x9c,0x15,0x6e,0x75,0x6d,0x62,0x65,0x72, +0xab,0,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x20, +0,0x63,0x46,0x64,0xa2,0x96,0x65,0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67, +0x6d,0x61,0x72,0x6b,0xa3,0x80,0xe6,0x80,1,7,0x6e,0x57,0x6e,0x52,0x6f,0x5e, +0x73,0xe1,0,0,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62, +0x6f,0x6c,0xff,2,0,0,0,0x22,0x12,0x74,0x72,0x6c,0xd9,0x80,0,0xdc, +0,0,1,0x6d,0x62,0x6e,1,0x6e,0x30,0x74,0x12,0x72,0x6f,0x6c,0xd9,0x80, +0,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69, +0x6f,0x6e,0xfd,0x40,0,0,0x19,0x62,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72, +0x6b,0xa5,0xc0,0x61,0x58,0x63,0xd9,0x80,0,0x66,0xdb,0,0,0x6c,0x1d,0x6f, +0x73,0x65,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x20,0, +0,0x18,0x73,0x65,0x64,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,2,0x61,0x32,0x65, +0x50,0x69,0x12,0x67,0x69,0x74,0xa7,0,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74, +0x75,0x61,0x74,0x69,0x6f,0x6e,0xe9,0,0,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e, +0x75,0x6d,0x62,0x65,0x72,0xa7,0 }; -const char PropNameData::nameGroups[23100]={ +const char PropNameData::nameGroups[23338]={ 2,'A','l','p','h','a',0,'A','l','p','h','a','b','e','t','i','c',0, 4,'N',0,'N','o',0,'F',0,'F','a','l','s','e',0,4,'Y',0,'Y','e','s',0,'T',0,'T','r','u','e',0, 2,'N','R',0,'N','o','t','_','R','e','o','r','d','e','r','e','d',0, @@ -1176,6 +1184,10 @@ const char PropNameData::nameGroups[23100]={ 2,'R','G','I','_','E','m','o','j','i','_','Z','W','J','_','S','e','q','u','e','n','c','e',0, 'R','G','I','_','E','m','o','j','i','_','Z','W','J','_','S','e','q','u','e','n','c','e',0, 2,'R','G','I','_','E','m','o','j','i',0,'R','G','I','_','E','m','o','j','i',0, +2,'I','D','S','U',0,'I','D','S','_','U','n','a','r','y','_','O','p','e','r','a','t','o','r',0, +2,'I','D','_','C','o','m','p','a','t','_','M','a','t','h','_','S','t','a','r','t',0,'I','D','_','C','o','m','p','a','t','_', +'M','a','t','h','_','S','t','a','r','t',0,2,'I','D','_','C','o','m','p','a','t','_','M','a','t','h','_','C','o','n','t','i', +'n','u','e',0,'I','D','_','C','o','m','p','a','t','_','M','a','t','h','_','C','o','n','t','i','n','u','e',0, 2,'b','c',0,'B','i','d','i','_','C','l','a','s','s',0,2,'L',0,'L','e','f','t','_','T','o','_','R','i','g','h','t',0, 2,'R',0,'R','i','g','h','t','_','T','o','_','L','e','f','t',0, 2,'E','N',0,'E','u','r','o','p','e','a','n','_','N','u','m','b','e','r',0, @@ -1568,9 +1580,11 @@ const char PropNameData::nameGroups[23100]={ 'e','n','d','e','d','_','A',0,2,'K','a','k','t','o','v','i','k','_','N','u','m','e','r','a','l','s',0, 'K','a','k','t','o','v','i','k','_','N','u','m','e','r','a','l','s',0, 2,'K','a','w','i',0,'K','a','w','i',0,2,'N','a','g','_','M','u','n','d','a','r','i',0, -'N','a','g','_','M','u','n','d','a','r','i',0,2,'c','c','c',0,'C','a','n','o','n','i','c','a','l','_','C','o','m','b','i', -'n','i','n','g','_','C','l','a','s','s',0,2,'d','t',0,'D','e','c','o','m','p','o','s','i','t','i','o','n','_','T','y','p', -'e',0,3,'N','o','n','e',0,'N','o','n','e',0,'n','o','n','e',0, +'N','a','g','_','M','u','n','d','a','r','i',0,2,'C','J','K','_','E','x','t','_','I',0,'C','J','K','_','U','n','i','f','i', +'e','d','_','I','d','e','o','g','r','a','p','h','s','_','E','x','t','e','n','s','i','o','n','_','I',0, +2,'c','c','c',0,'C','a','n','o','n','i','c','a','l','_','C','o','m','b','i','n','i','n','g','_','C','l','a','s','s',0, +2,'d','t',0,'D','e','c','o','m','p','o','s','i','t','i','o','n','_','T','y','p','e',0, +3,'N','o','n','e',0,'N','o','n','e',0,'n','o','n','e',0, 3,'C','a','n',0,'C','a','n','o','n','i','c','a','l',0,'c','a','n',0, 3,'C','o','m',0,'C','o','m','p','a','t',0,'c','o','m',0, 3,'E','n','c',0,'C','i','r','c','l','e',0,'e','n','c',0, @@ -1739,7 +1753,11 @@ const char PropNameData::nameGroups[23100]={ 's','i','s',0,2,'C','J',0,'C','o','n','d','i','t','i','o','n','a','l','_','J','a','p','a','n','e','s','e','_','S','t','a', 'r','t','e','r',0,2,'H','L',0,'H','e','b','r','e','w','_','L','e','t','t','e','r',0, 2,'E','B',0,'E','_','B','a','s','e',0,2,'E','M',0,'E','_','M','o','d','i','f','i','e','r',0, -2,'Z','W','J',0,'Z','W','J',0,2,'n','t',0,'N','u','m','e','r','i','c','_','T','y','p','e',0, +2,'Z','W','J',0,'Z','W','J',0,2,'A','K',0,'A','k','s','a','r','a',0, +2,'A','P',0,'A','k','s','a','r','a','_','P','r','e','b','a','s','e',0, +2,'A','S',0,'A','k','s','a','r','a','_','S','t','a','r','t',0, +2,'V','F',0,'V','i','r','a','m','a','_','F','i','n','a','l',0, +2,'V','I',0,'V','i','r','a','m','a',0,2,'n','t',0,'N','u','m','e','r','i','c','_','T','y','p','e',0, 2,'N','o','n','e',0,'N','o','n','e',0,2,'D','e',0,'D','e','c','i','m','a','l',0, 2,'D','i',0,'D','i','g','i','t',0,2,'N','u',0,'N','u','m','e','r','i','c',0, 2,'s','c',0,'S','c','r','i','p','t',0,2,'Z','y','y','y',0,'C','o','m','m','o','n',0, diff --git a/yass/third_party/icu/source/common/putil.cpp b/yass/third_party/icu/source/common/putil.cpp index 83f5cf496d..d6dcd96c0c 100644 --- a/yass/third_party/icu/source/common/putil.cpp +++ b/yass/third_party/icu/source/common/putil.cpp @@ -722,7 +722,7 @@ extern U_IMPORT char *U_TZNAME[]; #include /* Needed to search through system timezone files */ #endif static char gTimeZoneBuffer[PATH_MAX]; -static char *gTimeZoneBufferPtr = nullptr; +static const char *gTimeZoneBufferPtr = nullptr; #endif #if !U_PLATFORM_USES_ONLY_WIN32_API @@ -1177,7 +1177,7 @@ uprv_tzname(int n) gTimeZoneBuffer[ret] = 0; char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); - if (tzZoneInfoTailPtr != nullptr + if (tzZoneInfoTailPtr != NULL && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen)) { return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen); diff --git a/yass/third_party/icu/source/common/rbbi.cpp b/yass/third_party/icu/source/common/rbbi.cpp index 73716ab406..599279fb72 100644 --- a/yass/third_party/icu/source/common/rbbi.cpp +++ b/yass/third_party/icu/source/common/rbbi.cpp @@ -1125,6 +1125,7 @@ static icu::UStack *gLanguageBreakFactories = nullptr; static const icu::UnicodeString *gEmptyString = nullptr; static icu::UInitOnce gLanguageBreakFactoriesInitOnce {}; static icu::UInitOnce gRBBIInitOnce {}; +static icu::ICULanguageBreakFactory *gICULanguageBreakFactory = nullptr; /** * Release all static memory held by breakiterator. @@ -1153,37 +1154,41 @@ static void U_CALLCONV rbbiInit() { ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup); } -static void U_CALLCONV initLanguageFactories() { - UErrorCode status = U_ZERO_ERROR; +static void U_CALLCONV initLanguageFactories(UErrorCode& status) { U_ASSERT(gLanguageBreakFactories == nullptr); gLanguageBreakFactories = new UStack(_deleteFactory, nullptr, status); if (gLanguageBreakFactories != nullptr && U_SUCCESS(status)) { - ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status); - gLanguageBreakFactories->push(builtIn, status); + LocalPointer factory(new ICULanguageBreakFactory(status), status); + if (U_SUCCESS(status)) { + gICULanguageBreakFactory = factory.orphan(); + gLanguageBreakFactories->push(gICULanguageBreakFactory, status); #ifdef U_LOCAL_SERVICE_HOOK - LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status); - if (extra != nullptr) { - gLanguageBreakFactories->push(extra, status); - } + LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status); + if (extra != nullptr) { + gLanguageBreakFactories->push(extra, status); + } #endif + } } ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup); } +void ensureLanguageFactories(UErrorCode& status) { + umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories, status); +} static const LanguageBreakEngine* -getLanguageBreakEngineFromFactory(UChar32 c) +getLanguageBreakEngineFromFactory(UChar32 c, const char* locale) { - umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories); - if (gLanguageBreakFactories == nullptr) { - return nullptr; - } + UErrorCode status = U_ZERO_ERROR; + ensureLanguageFactories(status); + if (U_FAILURE(status)) return nullptr; int32_t i = gLanguageBreakFactories->size(); const LanguageBreakEngine *lbe = nullptr; while (--i >= 0) { LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); - lbe = factory->getEngineFor(c); + lbe = factory->getEngineFor(c, locale); if (lbe != nullptr) { break; } @@ -1199,7 +1204,7 @@ getLanguageBreakEngineFromFactory(UChar32 c) // //------------------------------------------------------------------------------- const LanguageBreakEngine * -RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { +RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) { const LanguageBreakEngine *lbe = nullptr; UErrorCode status = U_ZERO_ERROR; @@ -1215,14 +1220,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { int32_t i = fLanguageBreakEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); - if (lbe->handles(c)) { + if (lbe->handles(c, locale)) { return lbe; } } // No existing dictionary took the character. See if a factory wants to // give us a new LanguageBreakEngine for this character. - lbe = getLanguageBreakEngineFromFactory(c); + lbe = getLanguageBreakEngineFromFactory(c, locale); // If we got one, use it and push it on our stack. if (lbe != nullptr) { @@ -1259,6 +1264,18 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { return fUnhandledBreakEngine; } +#ifndef U_HIDE_DRAFT_API +void U_EXPORT2 RuleBasedBreakIterator::registerExternalBreakEngine( + ExternalBreakEngine* toAdopt, UErrorCode& status) { + LocalPointer engine(toAdopt, status); + if (U_FAILURE(status)) return; + ensureLanguageFactories(status); + if (U_FAILURE(status)) return; + gICULanguageBreakFactory->addExternalEngine(engine.orphan(), status); +} +#endif /* U_HIDE_DRAFT_API */ + + void RuleBasedBreakIterator::dumpCache() { fBreakCache->dumpCache(); } diff --git a/yass/third_party/icu/source/common/rbbi_cache.cpp b/yass/third_party/icu/source/common/rbbi_cache.cpp index 02ca555a89..f7a283f69e 100644 --- a/yass/third_party/icu/source/common/rbbi_cache.cpp +++ b/yass/third_party/icu/source/common/rbbi_cache.cpp @@ -158,12 +158,13 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // We now have a dictionary character. Get the appropriate language object // to deal with it. - const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c); + const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine( + c, fBI->getLocaleID(ULOC_REQUESTED_LOCALE, status)); // Ask the language object if there are any breaks. It will add them to the cache and // leave the text pointer on the other side of its range, ready to search for the next one. if (lbe != nullptr) { - foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status); + foundBreakCount += lbe->findBreaks(text, current, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status); } // Reload the loop variables for the next go-round diff --git a/yass/third_party/icu/source/common/rbbirb.cpp b/yass/third_party/icu/source/common/rbbirb.cpp index 7177254ec4..92cccc1a33 100644 --- a/yass/third_party/icu/source/common/rbbirb.cpp +++ b/yass/third_party/icu/source/common/rbbirb.cpp @@ -66,7 +66,6 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, fForwardTable = nullptr; fRuleStatusVals = nullptr; fChainRules = false; - fLBCMNoChain = false; fLookAheadHardBreak = false; fUSetNodes = nullptr; fRuleStatusVals = nullptr; diff --git a/yass/third_party/icu/source/common/rbbirb.h b/yass/third_party/icu/source/common/rbbirb.h index d983a184b6..96d3aa643d 100644 --- a/yass/third_party/icu/source/common/rbbirb.h +++ b/yass/third_party/icu/source/common/rbbirb.h @@ -159,9 +159,6 @@ public: UBool fChainRules; // True for chained Unicode TR style rules. // False for traditional regexp rules. - UBool fLBCMNoChain; // True: suppress chaining of rules on - // chars with LineBreak property == CM. - UBool fLookAheadHardBreak; // True: Look ahead matches cause an // immediate break, no continuing for the // longest match. diff --git a/yass/third_party/icu/source/common/rbbiscan.cpp b/yass/third_party/icu/source/common/rbbiscan.cpp index 455ace78b8..844b063909 100644 --- a/yass/third_party/icu/source/common/rbbiscan.cpp +++ b/yass/third_party/icu/source/common/rbbiscan.cpp @@ -547,8 +547,6 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart); if (opt == UNICODE_STRING("chain", 5)) { fRB->fChainRules = true; - } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) { - fRB->fLBCMNoChain = true; } else if (opt == UNICODE_STRING("forward", 7)) { fRB->fDefaultTree = &fRB->fForwardTree; } else if (opt == UNICODE_STRING("reverse", 7)) { diff --git a/yass/third_party/icu/source/common/rbbitblb.cpp b/yass/third_party/icu/source/common/rbbitblb.cpp index 0c2bcff4e5..8b40136fc7 100644 --- a/yass/third_party/icu/source/common/rbbitblb.cpp +++ b/yass/third_party/icu/source/common/rbbitblb.cpp @@ -458,21 +458,6 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNod // We've got a node that can end a match. - // !!LBCMNoChain implementation: If this node's val correspond to - // the Line Break $CM char class, don't chain from it. - // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used - // by any of the standard ICU rules. - if (fRB->fLBCMNoChain) { - UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); - if (c != -1) { - // c == -1 occurs with sets containing only the {eof} marker string. - ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); - if (cLBProp == U_LB_COMBINING_MARK) { - continue; - } - } - } - // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. RBBINode *startNode; diff --git a/yass/third_party/icu/source/common/sources.txt b/yass/third_party/icu/source/common/sources.txt index 90171fe9bd..c1d2080bcc 100644 --- a/yass/third_party/icu/source/common/sources.txt +++ b/yass/third_party/icu/source/common/sources.txt @@ -139,6 +139,8 @@ ulist.cpp uloc.cpp uloc_keytype.cpp uloc_tag.cpp +ulocale.cpp +ulocbuilder.cpp umapfile.cpp umath.cpp umutablecptrie.cpp diff --git a/yass/third_party/icu/source/common/ubidi_props_data.h b/yass/third_party/icu/source/common/ubidi_props_data.h index 5dcd1d7c78..f85dc09675 100644 --- a/yass/third_party/icu/source/common/ubidi_props_data.h +++ b/yass/third_party/icu/source/common/ubidi_props_data.h @@ -9,11 +9,11 @@ #ifdef INCLUDED_FROM_UBIDI_PROPS_C -static const UVersionInfo ubidi_props_dataVersion={0xf,0,0,0}; +static const UVersionInfo ubidi_props_dataVersion={0xf,1,0,0}; -static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6bc0,0x65d0,0x28,0x620,0x8cc,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6702b6}; +static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6ba0,0x65b0,0x28,0x620,0x8cc,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6702b6}; -static const uint16_t ubidi_props_trieIndex[13024]={ +static const uint16_t ubidi_props_trieIndex[13008]={ 0x387,0x38f,0x397,0x39f,0x3b7,0x3bf,0x3c7,0x3cf,0x3a7,0x3af,0x3a7,0x3af,0x3a7,0x3af,0x3a7,0x3af, 0x3a7,0x3af,0x3a7,0x3af,0x3d5,0x3dd,0x3e5,0x3ed,0x3f5,0x3fd,0x3f9,0x401,0x409,0x411,0x40c,0x414, 0x3a7,0x3af,0x3a7,0x3af,0x41c,0x424,0x3a7,0x3af,0x3a7,0x3af,0x3a7,0x3af,0x42a,0x432,0x43a,0x442, @@ -38,8 +38,8 @@ static const uint16_t ubidi_props_trieIndex[13024]={ 0x7e8,0x7f0,0x7f8,0x7ff,0x806,0x80e,0x812,0x7e0,0x67c,0x67c,0x67c,0x81a,0x820,0x67c,0x67c,0x826, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x82e,0x3a7,0x3a7,0x3a7,0x836,0x3a7,0x3a7,0x3a7,0x3f5, 0x83e,0x846,0x849,0x3a7,0x851,0x67c,0x67c,0x67f,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x858,0x85e, -0x86e,0x866,0x3a7,0x3a7,0x876,0x61f,0x3a7,0x3ce,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x67c,0x835, -0x3dc,0x3a7,0x87e,0x886,0x3a7,0x88e,0x896,0x3a7,0x3a7,0x3a7,0x3a7,0x89a,0x3a7,0x3a7,0x674,0x3cd, +0x86e,0x866,0x3a7,0x3a7,0x876,0x61f,0x3a7,0x3ce,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x67c,0x87e, +0x3dc,0x3a7,0x85e,0x882,0x3a7,0x88a,0x892,0x3a7,0x3a7,0x3a7,0x3a7,0x896,0x3a7,0x3a7,0x674,0x3cd, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, @@ -96,10 +96,10 @@ static const uint16_t ubidi_props_trieIndex[13024]={ 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x87e,0x67c,0x595,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x8a1,0x3a7,0x3a7,0x8a6,0x8ae,0x3a7,0x3a7,0x5cb,0x67c,0x673,0x3a7,0x3a7,0x8b6,0x3a7,0x3a7,0x3a7, -0x8be,0x8c5,0x645,0x8cd,0x3a7,0x3a7,0x5a1,0x8d5,0x3a7,0x8dd,0x8e4,0x3a7,0x501,0x8e9,0x3a7,0x51a, -0x3a7,0x8f1,0x8f9,0x51c,0x3a7,0x8fd,0x51b,0x905,0x3a7,0x3a7,0x3a7,0x90b,0x3a7,0x3a7,0x3a7,0x912, +0x3a7,0x3a7,0x3a7,0x3a7,0x85e,0x67c,0x595,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x89d,0x3a7,0x3a7,0x8a2,0x8aa,0x3a7,0x3a7,0x5cb,0x67c,0x673,0x3a7,0x3a7,0x8b2,0x3a7,0x3a7,0x3a7, +0x8ba,0x8c1,0x645,0x8c9,0x3a7,0x3a7,0x5a1,0x8d1,0x3a7,0x8d9,0x8e0,0x3a7,0x501,0x8e5,0x3a7,0x51a, +0x3a7,0x8ed,0x8f5,0x51c,0x3a7,0x8f9,0x51b,0x901,0x3a7,0x3a7,0x3a7,0x907,0x3a7,0x3a7,0x3a7,0x90e, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, @@ -139,9 +139,9 @@ static const uint16_t ubidi_props_trieIndex[13024]={ 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x926,0x91a,0x91e,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6, -0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x92e,0x936,0x4a6,0x4a6,0x4a6,0x93b,0x93f, -0x947,0x94f,0x953,0x95b,0x4a6,0x4a6,0x4a6,0x95f,0x967,0x397,0x96f,0x977,0x3a7,0x3a7,0x3a7,0x97f, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x922,0x916,0x91a,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6, +0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x92a,0x932,0x4a6,0x4a6,0x4a6,0x937,0x93b, +0x943,0x94b,0x94f,0x957,0x4a6,0x4a6,0x4a6,0x95b,0x963,0x397,0x96b,0x973,0x3a7,0x3a7,0x3a7,0x97b, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0xe9c,0xe9c,0xedc,0xf1c,0xe9c,0xe9c,0xe9c,0xe9c,0xe9c,0xe9c,0xf54,0xf94,0xfd4,0xfe4,0x1024,0x1030, @@ -178,68 +178,68 @@ static const uint16_t ubidi_props_trieIndex[13024]={ 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd89, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd89, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x987,0x3a7,0x67c,0x67c,0x98f,0x61f,0x3a7,0x514, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x997,0x3a7,0x3a7,0x3a7,0x99e,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x983,0x3a7,0x67c,0x67c,0x98b,0x61f,0x3a7,0x514, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x993,0x3a7,0x3a7,0x3a7,0x99a,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9a6,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c, -0x9ae,0x9b2,0x43c,0x43c,0x43c,0x43c,0x9c2,0x9ba,0x43c,0x9ca,0x43c,0x43c,0x9d2,0x9d8,0x43c,0x43c, -0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9e8,0x9e0,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c, -0x43c,0x43c,0x43c,0x9f0,0x43c,0x9f8,0x4a6,0xa00,0x43c,0xa08,0xa0f,0xa15,0xa1d,0xa21,0xa29,0x43c, -0x51b,0xa31,0xa38,0xa3f,0x41e,0xa47,0x569,0x3a7,0x501,0xa4e,0x3a7,0xa54,0x41e,0xa59,0xa61,0x3a7, -0x3a7,0xa66,0x51b,0x3a7,0x3a7,0x3a7,0x836,0xa6e,0x41e,0x5a3,0x57e,0xa75,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0xa31,0xa7d,0x3a7,0x3a7,0xa85,0xa8d,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa91,0xa99,0x3a7, -0x3a7,0xaa1,0x57e,0xaa9,0x3a7,0xaaf,0x3a7,0x3a7,0x60f,0xab7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0xabc,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xac3,0xacb,0x3a7,0x3a7,0x3a7,0xace,0x57e,0xad6, -0xada,0xae2,0x3a7,0xae9,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0xaf0,0x3a7,0x3a7,0xafe,0xaf8,0x3a7,0x3a7,0x3a7,0xb06,0xb0e,0x3a7,0xb12,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x5a5,0x41e,0x99e,0xb1a,0x3a7,0x3a7,0x3a7,0xb27,0xb22,0x3a7, +0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9a2,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c, +0x9aa,0x9ae,0x43c,0x43c,0x43c,0x43c,0x9be,0x9b6,0x43c,0x9c6,0x43c,0x43c,0x9ce,0x9d4,0x43c,0x43c, +0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9e4,0x9dc,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c, +0x43c,0x43c,0x43c,0x9ec,0x43c,0x9f4,0x4a6,0x9fc,0x43c,0xa04,0xa0b,0xa11,0xa19,0xa1d,0xa25,0x43c, +0x51b,0xa2d,0xa34,0xa3b,0x41e,0xa43,0x569,0x3a7,0x501,0xa4a,0x3a7,0xa50,0x41e,0xa55,0xa5d,0x3a7, +0x3a7,0xa62,0x51b,0x3a7,0x3a7,0x3a7,0x836,0xa6a,0x41e,0x5a3,0x57e,0xa71,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0xa2d,0xa79,0x3a7,0x3a7,0xa81,0xa89,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa8d,0xa95,0x3a7, +0x3a7,0xa9d,0x57e,0xaa5,0x3a7,0xaab,0x3a7,0x3a7,0x60f,0xab3,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0xab8,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xabf,0xac7,0x3a7,0x3a7,0x3a7,0xaca,0x57e,0xad2, +0xad6,0xade,0x3a7,0xae5,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0xaec,0x3a7,0x3a7,0xafa,0xaf4,0x3a7,0x3a7,0x3a7,0xb02,0xb0a,0x3a7,0xb0e,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x5a5,0x41e,0x99a,0xb16,0x3a7,0x3a7,0x3a7,0xb23,0xb1e,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0xb2f,0xb37,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb3d, -0x3a7,0xb43,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0xb2b,0xb33,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb39, +0x3a7,0xb3f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0xa55,0x3a7,0xb49,0x3a7,0x3a7,0xb51,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0xa51,0x3a7,0xb45,0x3a7,0x3a7,0xb4d,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x535,0xb59,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x535,0xb55,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xb61,0x500,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0xb69,0xb71,0xb77,0x3a7,0xb7d,0x67c,0x67c,0xb85,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x67c,0x67c,0xb8d,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb93, -0x3a7,0xb9a,0x3a7,0xb96,0x3a7,0xb9d,0x3a7,0xba5,0xba9,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xbb1,0x3f5,0xbb8,0xbbf,0xbc7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xb5d,0x500,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0xb65,0xb6d,0xb73,0x3a7,0xb79,0x67c,0x67c,0xb81,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x67c,0x67c,0xb89,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb8f, +0x3a7,0xb96,0x3a7,0xb92,0x3a7,0xb99,0x3a7,0xba1,0xba5,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xbad,0x3f5,0xbb4,0xbbb,0xbc3,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xbcf,0xbd7,0x3a7,0x3a7,0xa55,0x3a7,0x3a7, -0x3a7,0x3a7,0xb43,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa81,0x3a7, -0xbdc,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0xbe4,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0xbec, -0x43c,0xbf4,0xbf4,0xbfb,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c, -0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x91e,0x4a6,0x4a6,0x43c, -0x43c,0x4a6,0x4a6,0xc03,0x43c,0x43c,0x43c,0x43c,0x43c,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6, -0xc0b,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x67c,0xc13,0x67c,0x67c,0x67f,0xc18,0xc1c, -0x858,0xc24,0x3c9,0x3a7,0xc2a,0x3a7,0xc2f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x783,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xbcb,0xbd3,0x3a7,0x3a7,0xa51,0x3a7,0x3a7, +0x3a7,0x3a7,0xb3f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa7d,0x3a7, +0xbd8,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0xbe0,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0xbe8, +0x43c,0xbf0,0xbf0,0xbf7,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c, +0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x91a,0x4a6,0x4a6,0x43c, +0x43c,0x4a6,0x4a6,0xbff,0x43c,0x43c,0x43c,0x43c,0x43c,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6, +0xc07,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x67c,0xc0f,0x67c,0x67c,0x67f,0xc14,0xc18, +0x858,0xc20,0x3c9,0x3a7,0xc26,0x3a7,0xc2b,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x783,0x3a7,0x3a7,0x3a7, 0x3a7,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c, -0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0xc37, -0x98f,0x67c,0x67c,0x67c,0xc3e,0x67c,0x67c,0xc45,0xc4d,0xc13,0x67c,0xc55,0x67c,0xc5d,0xc62,0x3a7, -0x3a7,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67f,0xc6a,0xc73,0xc77,0xc7f, -0xc6f,0x67c,0x67c,0x67c,0x67c,0xc87,0x67c,0x792,0xc8f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0xc33, +0x98b,0x67c,0x67c,0x67c,0xc3a,0x67c,0x67c,0xc41,0xc49,0xc0f,0x67c,0xc51,0x67c,0xc59,0xc5e,0x3a7, +0x3a7,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67f,0xc66,0xc6f,0xc73,0xc7b, +0xc6b,0x67c,0x67c,0x67c,0x67c,0xc83,0x67c,0x792,0xc8b,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc96,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc92,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, 0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7, -0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc96,0xca6,0xc9e,0xc9e,0xc9e,0xca7,0xca7,0xca7, -0xca7,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0xcaf,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7, -0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7, -0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7, -0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7, -0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0x386,0x386,0x386,0x12,0x12,0x12,0x12, +0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc92,0xca2,0xc9a,0xc9a,0xc9a,0xca3,0xca3,0xca3, +0xca3,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0xcab,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3, +0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3, +0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3, +0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3, +0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0x386,0x386,0x386,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4, 4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2, @@ -551,15 +551,14 @@ static const uint16_t ubidi_props_trieIndex[13024]={ 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0, 0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa, 0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0, 0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a, 0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0xb1,0xb1,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0, +0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa, @@ -935,13 +934,13 @@ static const UBiDiProps ubidi_props_singleton={ ubidi_props_trieIndex+3612, nullptr, 3612, - 9412, + 9396, 0x1a0, 0xe9c, 0x0, 0x0, 0x110000, - 0x32dc, + 0x32cc, nullptr, 0, false, false, 0, nullptr }, { 2,2,0,0 } diff --git a/yass/third_party/icu/source/common/ucase.cpp b/yass/third_party/icu/source/common/ucase.cpp index 392e1266ae..4fd23fc935 100644 --- a/yass/third_party/icu/source/common/ucase.cpp +++ b/yass/third_party/icu/source/common/ucase.cpp @@ -317,43 +317,6 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { } } -namespace { - -/** - * Add the simple case closure mapping, - * except if there is not actually an scf relationship between the two characters. - * TODO: Unicode should probably add the corresponding scf mappings. - * See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23. - * If & when those scf mappings are added, we should be able to remove all of these exceptions. - */ -void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) { - switch (c) { - case 0x0390: - if (t == 0x1FD3) { return; } - break; - case 0x03B0: - if (t == 0x1FE3) { return; } - break; - case 0x1FD3: - if (t == 0x0390) { return; } - break; - case 0x1FE3: - if (t == 0x03B0) { return; } - break; - case 0xFB05: - if (t == 0xFB06) { return; } - break; - case 0xFB06: - if (t == 0xFB05) { return; } - break; - default: - break; - } - sa->add(sa->set, t); -} - -} // namespace - U_CFUNC void U_EXPORT2 ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); @@ -397,7 +360,7 @@ ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) { pe=pe0; UChar32 mapping; GET_SLOT_VALUE(excWord, idx, pe, mapping); - addOneSimpleCaseClosure(c, mapping, sa); + sa->add(sa->set, mapping); } } if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) { @@ -405,7 +368,7 @@ ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) { int32_t delta; GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - addOneSimpleCaseClosure(c, mapping, sa); + sa->add(sa->set, mapping); } /* get the closure string pointer & length */ @@ -448,7 +411,7 @@ ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) { for(int32_t idx=0; idxadd(sa->set, mapping); } } } diff --git a/yass/third_party/icu/source/common/ucase_props_data.h b/yass/third_party/icu/source/common/ucase_props_data.h index 7e6de63fb7..92b59520cc 100644 --- a/yass/third_party/icu/source/common/ucase_props_data.h +++ b/yass/third_party/icu/source/common/ucase_props_data.h @@ -9,9 +9,9 @@ #ifdef INCLUDED_FROM_UCASE_CPP -static const UVersionInfo ucase_props_dataVersion={0xf,0,0,0}; +static const UVersionInfo ucase_props_dataVersion={0xf,1,0,0}; -static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x76f2,0x66c8,0x683,0x172,0,0,0,0,0,0,0,0,0,0,3}; +static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x76ec,0x66c8,0x680,0x172,0,0,0,0,0,0,0,0,0,0,3}; static const uint16_t ucase_props_trieIndex[13148]={ 0x355,0x35d,0x365,0x36d,0x37b,0x383,0x38b,0x393,0x39b,0x3a3,0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2, @@ -509,9 +509,9 @@ static const uint16_t ucase_props_trieIndex[13148]={ 0x39b9,0x3a29,0x3a99,0x3b09,0x3b7b,0x3beb,0x3c5b,0x3ccb,0x3d3b,0x3dab,0x3e1b,0x3e8b,0x411,0x411,0x3ef9,0x3f79, 0x3fe9,0,0x4069,0x40e9,0xfc12,0xfc12,0xdb12,0xdb12,0x419b,4,0x4209,4,4,4,0x4259,0x42d9, 0x4349,0,0x43c9,0x4449,0xd512,0xd512,0xd512,0xd512,0x44fb,4,4,4,0x411,0x411,0x4569,0x4619, -0,0,0x46e9,0x4769,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4819,0x48c9, -0x4999,0x391,0x4a19,0x4a99,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b49,0x4bc9, -0x4c39,0,0x4cb9,0x4d39,0xc012,0xc012,0xc112,0xc112,0x4deb,4,4,0,0,0,0,0, +0,0,0x46d9,0x4759,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4809,0x48b9, +0x4979,0x391,0x49f9,0x4a79,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b29,0x4ba9, +0x4c19,0,0x4c99,0x4d19,0xc012,0xc012,0xc112,0xc112,0x4dcb,4,4,0,0,0,0,0, 0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0, 0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4, 0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0, @@ -525,8 +525,8 @@ static const uint16_t ucase_props_trieIndex[13148]={ 0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2, 0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0, -0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e5a,0, -2,0,0x4e9a,0x4eda,2,2,0,1,2,2,0xe12,2,1,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e3a,0, +2,0,0x4e7a,0x4eba,2,2,0,1,2,2,0xe12,2,1,0,0,0, 0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1, 0x21,0x21,0,0,0,0,0xf211,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812, @@ -541,13 +541,13 @@ static const uint16_t ucase_props_trieIndex[13148]={ 0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, 0x1812,0x1812,0x1812,0x1812,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811, 0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811, -0xe811,0xe811,0xe811,0xe811,0x92,0xff91,0x4f1a,0x4f3a,0x4f5a,0x4f79,0x4f99,0x92,0xff91,0x92,0xff91,0x92, -0xff91,0x4fba,0x4fda,0x4ffa,0x501a,1,0x92,0xff91,1,0x92,0xff91,1,1,1,1,1, -0x25,5,0x503a,0x503a,0x92,0xff91,0x92,0xff91,1,0,0,0,0,0,0,0x92, +0xe811,0xe811,0xe811,0xe811,0x92,0xff91,0x4efa,0x4f1a,0x4f3a,0x4f59,0x4f79,0x92,0xff91,0x92,0xff91,0x92, +0xff91,0x4f9a,0x4fba,0x4fda,0x4ffa,1,0x92,0xff91,1,0x92,0xff91,1,1,1,1,1, +0x25,5,0x501a,0x501a,0x92,0xff91,0x92,0xff91,1,0,0,0,0,0,0,0x92, 0xff91,0x92,0xff91,0x44,0x44,0x44,0x92,0xff91,0,0,0,0,0,0,0,0, -0,0,0,0,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059, -0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059, -0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0,0x5059,0,0,0,0,0,0x5059,0,0, +0,0,0,0,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039, +0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039, +0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0,0x5039,0,0,0,0,0,0x5039,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, @@ -562,7 +562,7 @@ static const uint16_t ucase_props_trieIndex[13148]={ 0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, -0x92,0xff91,0x507a,0x50b9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x505a,0x5099,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, 0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,4,4,4,0, 0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,4,0x92,0xff91,0x92,0xff91, 0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, @@ -573,11 +573,11 @@ static const uint16_t ucase_props_trieIndex[13148]={ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,0x92,0xff91, 0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,1,0x92,0xff91, 0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, -5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92,0xff91,0x50fa,0x92,0xff91, -0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92,0xff91,0x511a,1,0, +5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92,0xff91,0x50da,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92,0xff91,0x50fa,1,0, 0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, -0x92,0xff91,0x513a,0x515a,0x517a,0x519a,0x513a,1,0x51ba,0x51da,0x51fa,0x521a,0x92,0xff91,0x92,0xff91, -0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xe812,0x523a,0x525a,0x92,0xff91,0x92,0xff91,0, +0x92,0xff91,0x511a,0x513a,0x515a,0x517a,0x511a,1,0x519a,0x51ba,0x51da,0x51fa,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xe812,0x521a,0x523a,0x92,0xff91,0x92,0xff91,0, 0,0,0,0,0x92,0xff91,0,1,0,1,0x92,0xff91,0x92,0xff91,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,5,5,5,0x92,0xff91,0,5,5,1,0,0,0,0,0, @@ -607,17 +607,17 @@ static const uint16_t ucase_props_trieIndex[13148]={ 0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0, 0,0,0,4,4,0,0x64,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0x5279,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x5259,1,1,1,1, 1,1,1,4,5,5,5,5,1,1,1,1,1,1,1,1, -1,5,4,4,0,0,0,0,0x5299,0x52c9,0x52f9,0x5329,0x5359,0x5389,0x53b9,0x53e9, -0x5419,0x5449,0x5479,0x54a9,0x54d9,0x5509,0x5539,0x5569,0x5b99,0x5bc9,0x5bf9,0x5c29,0x5c59,0x5c89,0x5cb9,0x5ce9, -0x5d19,0x5d49,0x5d79,0x5da9,0x5dd9,0x5e09,0x5e39,0x5e69,0x5e99,0x5ec9,0x5ef9,0x5f29,0x5f59,0x5f89,0x5fb9,0x5fe9, -0x6019,0x6049,0x6079,0x60a9,0x60d9,0x6109,0x6139,0x6169,0x5599,0x55c9,0x55f9,0x5629,0x5659,0x5689,0x56b9,0x56e9, -0x5719,0x5749,0x5779,0x57a9,0x57d9,0x5809,0x5839,0x5869,0x5899,0x58c9,0x58f9,0x5929,0x5959,0x5989,0x59b9,0x59e9, -0x5a19,0x5a49,0x5a79,0x5aa9,0x5ad9,0x5b09,0x5b39,0x5b69,0,0,0,0,0,4,0,0, +1,5,4,4,0,0,0,0,0x5279,0x52a9,0x52d9,0x5309,0x5339,0x5369,0x5399,0x53c9, +0x53f9,0x5429,0x5459,0x5489,0x54b9,0x54e9,0x5519,0x5549,0x5b79,0x5ba9,0x5bd9,0x5c09,0x5c39,0x5c69,0x5c99,0x5cc9, +0x5cf9,0x5d29,0x5d59,0x5d89,0x5db9,0x5de9,0x5e19,0x5e49,0x5e79,0x5ea9,0x5ed9,0x5f09,0x5f39,0x5f69,0x5f99,0x5fc9, +0x5ff9,0x6029,0x6059,0x6089,0x60b9,0x60e9,0x6119,0x6149,0x5579,0x55a9,0x55d9,0x5609,0x5639,0x5669,0x5699,0x56c9, +0x56f9,0x5729,0x5759,0x5789,0x57b9,0x57e9,0x5819,0x5849,0x5879,0x58a9,0x58d9,0x5909,0x5939,0x5969,0x5999,0x59c9, +0x59f9,0x5a29,0x5a59,0x5a89,0x5ab9,0x5ae9,0x5b19,0x5b49,0,0,0,0,0,4,0,0, 4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x6199,0x6219,0x6299,0x6319,0x63c9,0x6479,0x6519,0, -0,0,0,0,0,0,0,0,0,0,0,0x65b9,0x6639,0x66b9,0x6739,0x67b9, +0,0,0,0,0,0,0,0,0x6179,0x61f9,0x6279,0x62f9,0x63a9,0x6459,0x64e9,0, +0,0,0,0,0,0,0,0,0,0,0,0x6589,0x6609,0x6689,0x6709,0x6789, 0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,4, @@ -838,7 +838,7 @@ static const uint16_t ucase_props_trieIndex[13148]={ 0,0,0,0,0,0,0,0,0,0,0,0 }; -static const uint16_t ucase_props_exceptions[1667]={ +static const uint16_t ucase_props_exceptions[1664]={ 0xc850,0x20,2,0x130,0x131,0x4810,0x20,0x841,0x6b,1,0x212a,0x841,0x73,1,0x17f,0x5c50, 0x20,2,0x130,0x131,0x844,0x4b,1,0x212a,0x844,0x53,1,0x17f,0x806,0x3bc,0x39c,0x841, 0xe5,1,0x212b,0x8c0,1,0x2220,0x73,0x73,0x53,0x53,0x53,0x73,0x1e9e,0x844,0xc5,1, @@ -909,41 +909,40 @@ static const uint16_t ucase_props_exceptions[1667]={ 0x3b7,0x3b9,0x397,0x399,0x880,0x2220,0x3ae,0x3b9,0x389,0x399,0x389,0x345,0x880,0x2220,0x3b7,0x342, 0x397,0x342,0x397,0x342,0x880,0x3330,0x3b7,0x342,0x3b9,0x397,0x342,0x399,0x397,0x342,0x345,0xc90, 9,0x220,0x3b7,0x3b9,0x397,0x399,0x880,0x3330,0x3b9,0x308,0x300,0x399,0x308,0x300,0x399,0x308, -0x300,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x390,0x880,0x2220, -0x3b9,0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,0x308,0x342,0x399,0x308, -0x342,0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,0x8c0,1,0x3330,0x3c5, -0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x3b0,0x880,0x2220,0x3c1,0x313,0x3a1,0x313,0x3a1, -0x313,0x880,0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,0x308,0x342,0x3a5,0x308, -0x342,0x3a5,0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,0x890,9,0x220,0x3c9, -0x3b9,0x3a9,0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,0x2220,0x3c9,0x342,0x3a9, -0x342,0x3a9,0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,0x342,0x345,0xc90,9, -0x220,0x3c9,0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,0x4b,0xc50,0x2046,1, -0xc5,0xc10,0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,0x2a1c,0xc10,0x29fd,0xc10, -0x2a1f,0xc10,0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,0xa64a,1,0x1c88,0xc10, -0x8a04,0xc10,0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,0xa512,0xc10,0xa52a,0xc10, -0xa515,0x810,0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,0x806,0x13a1,0x13a1,0x806, -0x13a2,0x13a2,0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,0x13a6,0x13a6,0x806,0x13a7, -0x13a7,0x806,0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,0x13ab,0x806,0x13ac,0x13ac, -0x806,0x13ad,0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,0x806,0x13b1,0x13b1,0x806, -0x13b2,0x13b2,0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,0x13b6,0x13b6,0x806,0x13b7, -0x13b7,0x806,0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,0x13bb,0x806,0x13bc,0x13bc, -0x806,0x13bd,0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,0x806,0x13c1,0x13c1,0x806, -0x13c2,0x13c2,0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,0x13c6,0x13c6,0x806,0x13c7, -0x13c7,0x806,0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,0x13cb,0x806,0x13cc,0x13cc, -0x806,0x13cd,0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,0x806,0x13d1,0x13d1,0x806, -0x13d2,0x13d2,0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,0x13d6,0x13d6,0x806,0x13d7, -0x13d7,0x806,0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,0x13db,0x806,0x13dc,0x13dc, -0x806,0x13dd,0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,0x806,0x13e1,0x13e1,0x806, -0x13e2,0x13e2,0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,0x13e6,0x13e6,0x806,0x13e7, -0x13e7,0x806,0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,0x13eb,0x806,0x13ec,0x13ec, -0x806,0x13ed,0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,0x66,0x46,0x46,0x46, -0x66,0x880,0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,0x6c,0x46,0x4c,0x46, -0x6c,0x880,0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,0x880,0x3330,0x66,0x66, -0x6c,0x46,0x46,0x4c,0x46,0x66,0x6c,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74, -0xfb06,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,0x2220,0x574,0x576,0x544, -0x546,0x544,0x576,0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,0x2220,0x574,0x56b,0x544, -0x53b,0x544,0x56b,0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,0x2220,0x574,0x56d,0x544, -0x53d,0x544,0x56d +0x300,0x882,0x390,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x880,0x2220,0x3b9, +0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,0x308,0x342,0x399,0x308,0x342, +0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,0x882,0x3b0,0x3330,0x3c5,0x308, +0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x880,0x2220,0x3c1,0x313,0x3a1,0x313,0x3a1,0x313,0x880, +0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,0x308,0x342,0x3a5,0x308,0x342,0x3a5, +0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,0x890,9,0x220,0x3c9,0x3b9,0x3a9, +0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,0x2220,0x3c9,0x342,0x3a9,0x342,0x3a9, +0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,0x342,0x345,0xc90,9,0x220,0x3c9, +0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,0x4b,0xc50,0x2046,1,0xc5,0xc10, +0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,0x2a1c,0xc10,0x29fd,0xc10,0x2a1f,0xc10, +0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,0xa64a,1,0x1c88,0xc10,0x8a04,0xc10, +0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,0xa512,0xc10,0xa52a,0xc10,0xa515,0x810, +0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,0x806,0x13a1,0x13a1,0x806,0x13a2,0x13a2, +0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,0x13a6,0x13a6,0x806,0x13a7,0x13a7,0x806, +0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,0x13ab,0x806,0x13ac,0x13ac,0x806,0x13ad, +0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,0x806,0x13b1,0x13b1,0x806,0x13b2,0x13b2, +0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,0x13b6,0x13b6,0x806,0x13b7,0x13b7,0x806, +0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,0x13bb,0x806,0x13bc,0x13bc,0x806,0x13bd, +0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,0x806,0x13c1,0x13c1,0x806,0x13c2,0x13c2, +0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,0x13c6,0x13c6,0x806,0x13c7,0x13c7,0x806, +0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,0x13cb,0x806,0x13cc,0x13cc,0x806,0x13cd, +0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,0x806,0x13d1,0x13d1,0x806,0x13d2,0x13d2, +0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,0x13d6,0x13d6,0x806,0x13d7,0x13d7,0x806, +0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,0x13db,0x806,0x13dc,0x13dc,0x806,0x13dd, +0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,0x806,0x13e1,0x13e1,0x806,0x13e2,0x13e2, +0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,0x13e6,0x13e6,0x806,0x13e7,0x13e7,0x806, +0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,0x13eb,0x806,0x13ec,0x13ec,0x806,0x13ed, +0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,0x66,0x46,0x46,0x46,0x66,0x880, +0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,0x6c,0x46,0x4c,0x46,0x6c,0x880, +0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,0x880,0x3330,0x66,0x66,0x6c,0x46, +0x46,0x4c,0x46,0x66,0x6c,0x882,0xfb06,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0x8c0,1, +0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,0x2220,0x574,0x576,0x544,0x546,0x544,0x576, +0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,0x2220,0x574,0x56b,0x544,0x53b,0x544,0x56b, +0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,0x2220,0x574,0x56d,0x544,0x53d,0x544,0x56d }; static const uint16_t ucase_props_unfold[370]={ diff --git a/yass/third_party/icu/source/common/ucasemap.cpp b/yass/third_party/icu/source/common/ucasemap.cpp index 1d8a8b6c2f..f6a0106aec 100644 --- a/yass/third_party/icu/source/common/ucasemap.cpp +++ b/yass/third_party/icu/source/common/ucasemap.cpp @@ -679,14 +679,18 @@ void toUpper(uint32_t options, // Adding one only to the final vowel in a longer sequence // (which does not occur in normal writing) would require lookahead. // Set the same flag as for preserving an existing dialytika. - if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && - (upper == 0x399 || upper == 0x3A5)) { - data |= HAS_DIALYTIKA; + if ((data & HAS_VOWEL) != 0 && + (state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) != + 0 && + (upper == 0x399 || upper == 0x3A5)) { + data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) != 0 ? HAS_DIALYTIKA + : HAS_COMBINING_DIALYTIKA; } int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. if ((data & HAS_YPOGEGRAMMENI) != 0) { numYpogegrammeni = 1; } + const UBool hasPrecomposedAccent = (data & HAS_ACCENT) != 0; // Skip combining diacritics after this Greek letter. int32_t nextNextIndex = nextIndex; while (nextIndex < srcLength) { @@ -704,7 +708,8 @@ void toUpper(uint32_t options, } } if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { - nextState |= AFTER_VOWEL_WITH_ACCENT; + nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT + : AFTER_VOWEL_WITH_COMBINING_ACCENT; } // Map according to Greek rules. UBool addTonos = false; @@ -715,7 +720,7 @@ void toUpper(uint32_t options, !isFollowedByCasedLetter(src, nextIndex, srcLength)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. - if (i == nextIndex) { + if (hasPrecomposedAccent) { upper = 0x389; // Preserve the precomposed form. } else { addTonos = true; diff --git a/yass/third_party/icu/source/common/ucasemap_imp.h b/yass/third_party/icu/source/common/ucasemap_imp.h index 71d0e9033f..bc83c6bd78 100644 --- a/yass/third_party/icu/source/common/ucasemap_imp.h +++ b/yass/third_party/icu/source/common/ucasemap_imp.h @@ -263,7 +263,8 @@ static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALY // State bits. static const uint32_t AFTER_CASED = 1; -static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; +static const uint32_t AFTER_VOWEL_WITH_COMBINING_ACCENT = 2; +static const uint32_t AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT = 4; uint32_t getLetterData(UChar32 c); diff --git a/yass/third_party/icu/source/common/uchar_props_data.h b/yass/third_party/icu/source/common/uchar_props_data.h index f4d3932574..0324223975 100644 --- a/yass/third_party/icu/source/common/uchar_props_data.h +++ b/yass/third_party/icu/source/common/uchar_props_data.h @@ -9,9 +9,9 @@ #ifdef INCLUDED_FROM_UCHAR_C -static const UVersionInfo dataVersion={0xf,0,0,0}; +static const UVersionInfo dataVersion={0xf,1,0,0}; -static const uint16_t propsTrie_index[23016]={ +static const uint16_t propsTrie_index[23156]={ 0x495,0x49d,0x4a5,0x4ad,0x4c5,0x4cd,0x4d5,0x4dd,0x4e5,0x4ed,0x4f3,0x4fb,0x503,0x50b,0x513,0x51b, 0x521,0x529,0x531,0x539,0x53c,0x544,0x54c,0x554,0x55c,0x564,0x560,0x568,0x570,0x578,0x57d,0x585, 0x58d,0x595,0x599,0x5a1,0x5a9,0x5b1,0x5b9,0x5c1,0x5bd,0x5c5,0x5ca,0x5d2,0x5d8,0x5e0,0x5e8,0x5f0, @@ -51,53 +51,53 @@ static const uint16_t propsTrie_index[23016]={ 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x934,0x934, -0xc2e,0x600,0xc31,0x600,0xc39,0xc3f,0xc47,0xc4f,0xc54,0x600,0x600,0xc58,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc5f,0x600,0xc66,0xc6c,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc74,0x600,0x600,0x600,0xc7c,0x600, +0xc2e,0xc35,0xc37,0x600,0xc3f,0xc45,0xc4d,0xc55,0xc5a,0x600,0x600,0xc5e,0x600,0x600,0x600,0xc64, +0xc6b,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc72,0x600,0xc79,0xc7f,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc87,0x600,0x600,0x600,0xc8f,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0xc7e,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc85,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0xc91,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc98,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0xc8c,0x600,0x600,0x600,0xc93,0xc9b,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0xc9f,0x600,0x600,0x600,0xca6,0xcae,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xca0,0x600,0x600,0xca8,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0xcb3,0xcb8,0x600,0x600,0xcc0,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcac,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcc4,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcc9,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcc7,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcd1,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0xcd7,0xcdf,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xce5, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcaf,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0xcec,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xcb2,0x600,0x600,0x600, +0x600,0xcf1,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0xcb8,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0xcf6,0x600,0x600,0x600,0xc32,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0xcd3,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0xcfc,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0xd04,0xd0b,0xd0f,0x600,0x600,0x600,0xccb,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0xcc0,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0xd1e,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0xcc5,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0xcca,0x600,0x600,0x600,0xccf,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0xcd7,0xcde,0xce2,0x600,0x600,0x600,0xce9,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0xcf7,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0xcef,0x934,0xcff,0x9ad,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0xd04,0xd0c,0x4e5,0xd1c,0xd14,0x600,0x600,0xd24,0xd2c,0xd3c,0x4e5,0xd41,0xd49,0xd4f,0xd56,0xd34, -0xd5e,0xd66,0x600,0xd6e,0xd7e,0xd81,0xd76,0xd89,0x655,0xd91,0xd98,0x8f6,0x6a3,0xda8,0xda0,0xdb0, -0x600,0xdb8,0xdc0,0xdc8,0x600,0xdd0,0xdd8,0xde0,0xde8,0xdf0,0xdf4,0xdfc,0x535,0x535,0x600,0xe04, +0x600,0x600,0x600,0x600,0xd16,0x934,0xd26,0x9ad,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0xd2b,0xd33,0x4e5,0xd43,0xd3b,0x600,0x600,0xd4b,0xd53,0xd63,0x4e5,0xd68,0xd70,0xd76,0xd7d,0xd5b, +0xd85,0xd8d,0x600,0xd95,0xda5,0xda8,0xd9d,0xdb0,0x655,0xdb8,0xdbf,0x8f6,0x6a3,0xdcf,0xdc7,0xdd7, +0x600,0xddf,0xde7,0xdef,0x600,0xdf7,0xdff,0xe07,0xe0f,0xe17,0xe1b,0xe23,0x535,0x535,0x600,0xe2b, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, @@ -119,29 +119,29 @@ static const uint16_t propsTrie_index[23016]={ 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xe0c,0xe18,0xe10, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xe33,0xe3f,0xe37, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20, -0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0x600,0x600,0x600,0xe30,0x600,0xcea,0xe37,0xe3c, -0x600,0x600,0x600,0xe44,0x600,0x600,0x901,0x4b5,0xe5a,0xe4a,0xe52,0x600,0x600,0xe62,0xe6a,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xe6f,0x938,0x600,0xe77,0x600,0xe7d,0xe81, -0xe89,0xe91,0xe98,0xea0,0x600,0x600,0x600,0xea6,0xebe,0x4a5,0xec6,0xece,0xed3,0x916,0xeae,0xeb6, -0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20, -0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20,0xe20, +0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47, +0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0x600,0x600,0x600,0xe57,0x600,0xccc,0xe5e,0xe63, +0x600,0x600,0x600,0xe6b,0x600,0x600,0x901,0x4b5,0xe81,0xe71,0xe79,0x600,0x600,0xe89,0xe91,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xe96,0x938,0x600,0xe9e,0x600,0xea4,0xea8, +0xeb0,0xeb8,0xebf,0xec7,0x600,0x600,0x600,0xecd,0xee5,0x4a5,0xeed,0xef5,0xefa,0x916,0xed5,0xedd, +0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47, +0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47,0xe47, 0x12d4,0x12d4,0x1314,0x1354,0x1394,0x13cc,0x140c,0x144c,0x1484,0x14c4,0x14f0,0x1530,0x1570,0x1580,0x15c0,0x15f4, 0x1634,0x1664,0x16a4,0x16e4,0x16f4,0x1728,0x1760,0x17a0,0x17e0,0x1820,0x1854,0x1880,0x18c0,0x18f8,0x1914,0x1954, 0xa80,0xac0,0xb00,0xb40,0xb80,0xbab,0xbeb,0xa40,0xc0e,0xa40,0xa40,0xa40,0xa40,0xc4e,0x1db,0x1db, @@ -180,132 +180,132 @@ static const uint16_t propsTrie_index[23016]={ 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0xedb,0xee2,0xeea,0x4b5,0x600,0x600,0x600,0xef2,0xf02,0xefa,0xf19,0xf0a,0xf11,0xf21,0xf25,0xf29, -0x4b5,0x4b5,0x4b5,0x4b5,0x8f6,0x600,0xf31,0xf39,0x600,0xf41,0xf49,0xf4d,0xf55,0x600,0xf5d,0x4b5, -0x58d,0x597,0xf65,0x600,0xf69,0xf71,0xf81,0xf79,0x600,0xf89,0x600,0xf90,0xfa0,0xf98,0x4b5,0x4b5, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xb88,0x902,0xfa8,0xfb8,0xfb0,0x4b5,0x4b5, -0xfc8,0xfc0,0xfcb,0xfd3,0x916,0xfdb,0x4b5,0xfe3,0xfeb,0xff3,0x4b5,0x4b5,0x600,0x1003,0x100b,0xffb, -0x101b,0x1022,0x1013,0x102a,0x1032,0x4b5,0x1042,0x103a,0x600,0x1045,0x104d,0x1055,0x105d,0x1065,0x4b5,0x4b5, -0x600,0x600,0x106d,0x4b5,0x58d,0x1075,0x535,0x107d,0x600,0x1085,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x108d,0x600,0x1095,0x4b5,0x109a,0x10a2,0x10aa,0x10b1,0xfdf,0x10b9,0xfdf,0x10c1,0xb88, -0x10d1,0x636,0x10d9,0x10c9,0x98f,0x10e1,0x10e9,0x10ef,0x1107,0x10f7,0x10ff,0x110b,0x98f,0x111b,0x1113,0x1123, -0x113b,0x112b,0x1133,0x4b5,0x1142,0x114a,0x658,0x1152,0x1162,0x1168,0x1170,0x115a,0x4b5,0x4b5,0x4b5,0x4b5, -0x600,0x1178,0x1180,0x1099,0x600,0x1188,0x1190,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x1198,0x11a0,0x4b5, -0x600,0x11a8,0x11b0,0x11b8,0x600,0x11c8,0x11c0,0x4b5,0x870,0x11d0,0x11d8,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x600,0x11e0,0x4b5,0x4b5,0x4b5,0x58d,0x535,0x11e8,0x11f8,0x11fe,0x11f0,0x4b5,0x4b5,0x120e,0x1212,0x1206, -0x122a,0x121a,0x1222,0x600,0x1238,0x1232,0x600,0x8f7,0x1248,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x1256,0x125b,0x1240,0x1250,0x126b,0x1263,0x4b5,0x4b5,0x127a,0x127e,0x1272,0x128e,0x1286,0x11c0,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x1292,0x12a2,0x12a7,0x129a,0x4b5,0x4b5,0x12af,0x12bf,0x12b7, +0xf02,0xf09,0xf11,0x4b5,0x600,0x600,0x600,0xf19,0xf29,0xf21,0xf40,0xf31,0xf38,0xf48,0xbbd,0xf50, +0x4b5,0x4b5,0x4b5,0x4b5,0x8f6,0x600,0xf58,0xf60,0x600,0xf68,0xf70,0xf74,0xf7c,0x600,0xf84,0x4b5, +0x58d,0x597,0xf8c,0x600,0xf90,0xf98,0xfa8,0xfa0,0x600,0xfb0,0x600,0xfb7,0xfc7,0xfbf,0x4b5,0x4b5, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xb88,0x902,0xfcf,0xfdf,0xfd7,0x4b5,0x4b5, +0xfef,0xfe7,0xff2,0xffa,0x916,0x1002,0x4b5,0x100a,0x1012,0x101a,0x4b5,0x4b5,0x600,0x102a,0x1032,0x1022, +0x1042,0x1049,0x103a,0x1051,0x1059,0x4b5,0x1069,0x1061,0x600,0x106c,0x1074,0x107c,0x1084,0x108c,0x4b5,0x4b5, +0x600,0x600,0x1094,0x4b5,0x58d,0x109c,0x535,0x10a4,0x600,0x10ac,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x10b4,0x600,0x10bc,0x4b5,0x10c1,0x10c9,0x10d1,0x10d8,0x1006,0x10e0,0x1006,0x10e8,0xb88, +0x10f8,0x636,0x1100,0x10f0,0x98f,0x1108,0x1110,0x1116,0x112e,0x111e,0x1126,0x1132,0x98f,0x1142,0x113a,0x114a, +0x1162,0x1152,0x115a,0x4b5,0x1169,0x1171,0x658,0x1179,0x1189,0x118f,0x1197,0x1181,0x4b5,0x4b5,0x4b5,0x4b5, +0x600,0x119f,0x11a7,0x10c0,0x600,0x11af,0x11b7,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x11bf,0x11c7,0x4b5, +0x600,0x11cf,0x11d7,0x11df,0x600,0x11ef,0x11e7,0x4b5,0x870,0x11f7,0x11ff,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x600,0x1207,0x4b5,0x4b5,0x4b5,0x58d,0x535,0x120f,0x121f,0x1225,0x1217,0x4b5,0x4b5,0x1235,0x1239,0x122d, +0x1251,0x1241,0x1249,0x600,0x125f,0x1259,0x600,0x8f7,0x126f,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x127d,0x1282,0x1267,0x1277,0x1292,0x128a,0x4b5,0x4b5,0x12a1,0x12a5,0x1299,0x12b5,0x12ad,0x11e7,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x12b9,0x12c9,0x12ce,0x12c1,0x4b5,0x4b5,0x12d6,0x12e6,0x12de, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x901,0x4b5,0x4b5,0x4b5, -0x12cf,0x12d7,0x12df,0x12c7,0x600,0x600,0x600,0x600,0x600,0x600,0x12e7,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x12f6,0x12fe,0x1306,0x12ee,0x600,0x600,0x600,0x600,0x600,0x600,0x130e,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0xfdf,0x600,0x600,0x12ef,0x600,0x600,0x600,0x600,0x600, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x1006,0x600,0x600,0x1316,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x12f7,0x12ff,0x4b5,0x4b5, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x131e,0x1326,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x11d8,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600, +0x11ff,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x8f7, -0x916,0xda4,0x600,0x916,0x1307,0x130c,0x600,0x131c,0x1324,0x132c,0x1314,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x916,0xdcb,0x600,0x916,0x132e,0x1333,0x600,0x1343,0x134b,0x1353,0x133b,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x58d,0x535,0x1334,0x4b5,0x4b5,0x4b5,0x600,0x600,0x133c,0x1341,0x1347,0x4b5,0x4b5,0x134f,0x600,0x600, +0x58d,0x535,0x135b,0x4b5,0x4b5,0x4b5,0x600,0x600,0x1363,0x1368,0x136e,0x4b5,0x4b5,0x1376,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1357,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x137e,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x902,0x4b5,0x106d,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x600,0x600,0x600,0x600,0x902,0x4b5,0x1094,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x135d,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x1365,0x136a,0x1371,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xe10,0x4b5, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x1384,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x138c,0x1391,0x1398,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xe37,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600,0x600,0x1377,0x137c,0x1384,0x4b5,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600,0x600,0x139e,0x13a3,0x13ab,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x55c,0x1394,0x139b,0x934,0x934,0x934,0x138c,0x4b5,0x934,0x934,0x934, -0x934,0x934,0x934,0x934,0xbb7,0x934,0x13a2,0x934,0x13a9,0x13b1,0x13b7,0x934,0xade,0x934,0x934,0x13bf, -0x4b5,0x4b5,0x4b5,0x13c7,0x13c7,0x934,0x934,0xadb,0x13cf,0x4b5,0x4b5,0x4b5,0x4b5,0x13df,0x13e6,0x13eb, -0x13f1,0x13f9,0x1401,0x1409,0x13e3,0x1411,0x1419,0x1421,0x1426,0x13f8,0x13df,0x13e6,0x13e2,0x13f1,0x142e,0x13e0, -0x1431,0x13e3,0x1439,0x1441,0x1449,0x1450,0x143c,0x1444,0x144c,0x1453,0x143f,0x145b,0x13d7,0x934,0x934,0x934, -0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x55c,0x146b,0x55c, -0x1472,0x1479,0x1463,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x55c,0x13bb,0x13c2,0x934,0x934,0x934,0x13b3,0x4b5,0x934,0x934,0x934, +0x934,0x934,0x934,0x934,0xbb7,0x934,0x13c9,0x934,0x13d0,0x13d8,0x13de,0x934,0xade,0x934,0x934,0x13e6, +0x4b5,0x4b5,0x4b5,0x13ee,0x13ee,0x934,0x934,0xadb,0x13f6,0x4b5,0x4b5,0x4b5,0x4b5,0x1406,0x140d,0x1412, +0x1418,0x1420,0x1428,0x1430,0x140a,0x1438,0x1440,0x1448,0x144d,0x141f,0x1406,0x140d,0x1409,0x1418,0x1455,0x1407, +0x1458,0x140a,0x1460,0x1468,0x1470,0x1477,0x1463,0x146b,0x1473,0x147a,0x1466,0x1482,0x13fe,0x934,0x934,0x934, +0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x55c,0x1492,0x55c, +0x1499,0x14a0,0x148a,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x1488,0x1490,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x1480,0x1498,0x9d4, -0x14a8,0x14a0,0x4b5,0x4b5,0x4b5,0x600,0x14b8,0x14b0,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0xfdf,0x14c0,0x600,0x14c8,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0xfdf,0x14d0,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x14d8,0x600,0x600,0x600, -0x600,0x600,0x600,0x14e0,0x4b5,0x58d,0x14f0,0x14e8,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x14af,0x14b7,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x14a7,0x14bf,0x9d4, +0x14cf,0x14c7,0x4b5,0x4b5,0x4b5,0x600,0x14df,0x14d7,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x1006,0x14e7,0x600,0x14ef,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x1006,0x14f7,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x14ff,0x600,0x600,0x600, +0x600,0x600,0x600,0x1507,0x4b5,0x58d,0x1517,0x150f,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x14f8,0x1508,0x1500,0x4b5,0x4b5,0x1518,0x1510,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x1528,0x1530,0x1538, -0x1540,0x1548,0x1550,0x4b5,0x1520,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x934,0x1558,0x934, -0x934,0xbaf,0x13a0,0x1560,0xbb7,0x1568,0x934,0x934,0x934,0x934,0xbb9,0x4b5,0x1570,0x1578,0x157c,0x1584, -0x158c,0x4b5,0x4b5,0x4b5,0x4b5,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x1594,0x934,0x934,0x934, +0x151f,0x152f,0x1527,0x4b5,0x4b5,0x153f,0x1537,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x154f,0x1557,0x155f, +0x1567,0x156f,0x1577,0x4b5,0x1547,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x934,0x157f,0x934, +0x934,0xbaf,0x13c7,0x1587,0xbb7,0x158f,0x934,0x934,0x934,0x934,0xbb9,0x4b5,0x1597,0x159f,0x15a3,0x15ab, +0x15b3,0x4b5,0x4b5,0x4b5,0x4b5,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x15bb,0x934,0x934,0x934, 0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934, -0x934,0x934,0x934,0x157d,0x159c,0x934,0x934,0x934,0x15a4,0x934,0x934,0x15ab,0x15b3,0x1558,0x934,0x15bb, -0x934,0x15c3,0x15c8,0x4b5,0x4b5,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0xbaf, -0x15d0,0x15d9,0x15dd,0x15e5,0x15d5,0x934,0x934,0x934,0x934,0x15ed,0x934,0xade,0x11bc,0x4b5,0x4b5,0x4b5, +0x934,0x934,0x934,0x15a4,0x15c3,0x934,0x934,0x934,0x15cb,0x934,0x934,0x15d2,0x15da,0x157f,0x934,0x15e2, +0x934,0x15ea,0x15ef,0x4b5,0x4b5,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0x934,0xbaf, +0x15f7,0x1600,0x1604,0x160c,0x15fc,0x934,0x934,0x934,0x934,0x1614,0x934,0xade,0x11e3,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x15f5,0x600,0x600, -0x15fc,0x600,0x600,0x600,0x1604,0x600,0x160c,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x161c,0x600,0x600, +0x1623,0x600,0x600,0x600,0x162b,0x600,0x1633,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xc90,0x600,0x600, -0x1614,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x161c,0x1624,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xca3,0x600,0x600, +0x163b,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1643,0x164b,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0xccf,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0xc32,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x162b,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1652,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1632,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1659,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x1639,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0x600,0x600,0x600,0x600,0x1660,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x4b5,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x901,0x600,0x600,0x600,0x600,0x600,0x600,0xf69,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x901,0x600,0x600,0x600,0x600,0x600,0x600,0xf90,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1641,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1668,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1649,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600, -0x600,0x600,0x1651,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xf69,0x4b5, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1670,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, +0xf90,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600, +0x600,0x600,0x1674,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0xf90,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x67d,0x600,0x600,0x600,0x600,0x600,0x600,0x600, 0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x1314,0x4b5, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x133b,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0x1661,0x1659,0x1659,0x1659,0x4b5,0x4b5,0x4b5,0x4b5,0x55c,0x55c,0x55c,0x55c,0x55c,0x55c,0x55c, -0x1669,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, +0x4b5,0x1684,0x167c,0x167c,0x167c,0x4b5,0x4b5,0x4b5,0x4b5,0x55c,0x55c,0x55c,0x55c,0x55c,0x55c,0x55c, +0x168c,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, 0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5,0x4b5, -0x4b5,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0x1671,0x494,0x494,0x494,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0x4b5,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0x1694,0x494,0x494,0x494,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, 0xf,0xf,0xf,0xf,0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18, 0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17, @@ -765,7 +765,7 @@ static const uint16_t propsTrie_index[23016]={ 0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, 0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0,0,0,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,6,6, +0x1b,0x1b,0x1b,0x1b,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,6,6, 6,6,8,8,0x13,4,4,4,4,4,0x1b,0x1b,0x7ca,0xa4a,0xcca,4, 5,0x17,0x1b,0x1b,0xc,0x17,0x17,0x17,0x1b,4,5,0x54a,0x14,0x15,0x14,0x15, 0x14,0x15,0x14,0x15,0x14,0x15,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15, @@ -778,7 +778,7 @@ static const uint16_t propsTrie_index[23016]={ 5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,0,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x1b,0x1b, 0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0x1b,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x1b,0x1b, 0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, 0x1b,0x1b,0x1b,0x1b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1b,0xa8b,0xacb,0xb0b, @@ -793,664 +793,673 @@ static const uint16_t propsTrie_index[23016]={ 5,5,5,5,5,0x705,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,0x585,5,5,0x705,5,5,5,0x7885, 5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x5c5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x5c5,5,5,5,5,5,5,5,0x685,5,0x645,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x785,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x5c5,5,5,5,5,5,5,5, +0x685,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x7b85,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0x7985,0x7c5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0x7985,0x7c5,5,5,5, +5,5,5,0x7845,5,5,5,5,5,5,5,5,0x605,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5,5,5, -5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,5, +5,5,5,5,5,0x685,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x1e45,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x5c5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x1e45,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x7985,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7a85,5, +5,5,0x5c5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x7985,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x5c5,5,0x745,5,0x6c5,5,5, +5,5,5,5,5,5,5,5,5,5,0x7905,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x7c5,5,0x7845,0xa45,0xcc5,5,5,5,5,5,5,0xf45,5,5,5, +5,5,5,5,5,0x5c5,5,0x745,5,0x6c5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x7c5,5,0x7845, +0xa45,0xcc5,5,5,5,5,5,5,0xf45,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x605,0x605,0x605, +0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x645, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x605,0x605,0x605,0x605,5,5,5,5,5,5,5,5,5,5,5, +5,0x585,5,5,5,5,5,5,5,0x585,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x585,5,5,5,5,5,5,5,0x585,5,5, +5,5,5,5,5,5,0x585,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x585,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x785,0xa45,5,5,5,5, -5,5,5,5,5,5,5,5,0x585,0x5c5,0x605,5,0x5c5,5,5,5, +5,5,5,5,5,5,0x785,0xa45,5,5,5,5,5,5,5,5, +5,5,5,5,0x585,0x5c5,0x605,5,0x5c5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x705,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7c5,5, 5,5,5,5,5,5,5,5,5,5,5,5,0x745,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,0x705,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x785,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1e45,5, +5,5,0x545,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x785,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x1e45,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x8005,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x79c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,0x645,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 0x7885,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,0x5c5,5,5,5,5,0x5c5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5, +5,5,5,5,5,5,5,5,5,0x785,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x6c5,5, -5,5,5,5,0x1e45,5,5,5,5,5,5,5,5,5,5,5, +5,0x7845,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x545,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5, +5,5,0x6c5,5,5,5,5,5,0x1e45,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,4,5,5,5,5,5,5,5,5,5,5,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,4,0x17,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,4,5,5, +5,5,5,5,5,5,5,5,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,4,0x17,0x17,0x17,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,2,1,2, 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,4,4,6,6, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,5,6, -7,7,7,0x17,6,6,6,6,6,6,6,6,6,6,0x17,4, -5,5,5,5,5,5,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x54a, -6,6,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0, +1,2,1,2,1,2,1,2,4,4,6,6,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,5,6,7,7,7,0x17, +6,6,6,6,6,6,6,6,6,6,0x17,4,5,5,5,5, +5,5,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x54a,6,6,0x17,0x17, +0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0x1a,0x1a,0x1a,0x1a, 0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, -0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,4,4,4,1,2,5,4,4,2,5,5,5,5,5, -0x1a,0x1a,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -2,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,4,2,2,2,2,2,2,2,2,1,2,1, -2,1,1,2,1,2,1,2,1,2,1,2,4,0x1a,0x1a,1, -2,1,2,5,1,2,1,2,2,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,1,1,1,1,2,1,1,1,1, -1,2,1,2,1,2,1,2,1,2,1,2,1,1,1,1, -2,1,2,0,0,0,0,0,1,2,0,2,0,2,1,2, -1,2,0,0,0,0,0,0,5,5,6,5,5,5,6,5, -5,5,5,6,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,8,8,6,6,8, -0x1b,0x1b,0x1b,0x1b,6,0,0,0,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,0x1b, -0x19,0x1b,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0x17,0x17,0x17,0x17, -0,0,0,0,0,0,0,0,8,8,8,8,6,6,0,0, -0,0,0,0,0,0,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,8,8,5,5,5,5,5,5, +0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4, +4,1,2,5,4,4,2,5,5,5,5,5,0x1a,0x1a,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,2,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +4,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2, +1,2,1,2,1,2,1,2,4,0x1a,0x1a,1,2,1,2,5, +1,2,1,2,2,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,1,1,1,1,2,1,1,1,1,1,2,1,2, +1,2,1,2,1,2,1,2,1,1,1,1,2,1,2,0, +0,0,0,0,1,2,0,2,0,2,1,2,1,2,0,0, +0,0,0,0,5,5,6,5,5,5,6,5,5,5,5,6, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,8,8,8,8,8,8,8,8, -8,8,8,8,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,5,5,5,5,5,5,0x17,0x17,0x17,5, -0x17,5,5,6,5,5,5,5,5,5,6,6,6,6,6,6, -6,6,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6, -6,6,8,8,0,0,0,0,0,0,0,0,0,0,0,0x17, -8,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,4, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,6,8,8,6,6,6,6,8,8,6,6,8,8, -5,5,5,5,5,6,4,5,5,5,5,5,5,5,5,5, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,0, -5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,8, -8,6,6,8,8,6,6,0,0,0,0,0,0,0,0,0, -5,5,5,6,5,5,5,5,5,5,5,5,6,8,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0x17,0x17,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -4,5,5,5,5,5,5,0x1b,0x1b,0x1b,5,8,6,8,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -6,5,6,6,6,5,5,6,6,5,5,5,5,5,6,6, -5,6,5,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,5,5,4,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,8,6,6,8,8, -0x17,0x17,5,4,4,8,6,0,0,0,0,0,0,0,0,0, -0,5,5,5,5,5,5,0,0,5,5,5,5,5,5,0, -0,5,5,5,5,5,5,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,0x1a,4,4,4,4, -2,2,2,2,2,2,2,2,2,4,0x1a,0x1a,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -5,5,5,8,8,6,8,8,6,8,8,0x17,8,6,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -5,5,5,5,5,5,5,0,0,0,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, -0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, -5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5,5, -5,5,5,0x7c5,5,5,5,5,0x5c5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x6c5,5,0x6c5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x7c5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x18,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,5,5,5,5,0,5,0,5,5,0,5,5,0,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,2,2,2,2,2,2,2,0, -0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2, -0,0,0,0,0,5,6,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, -0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x15,0x14,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -0,0,0,0x1b,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,8,8,6,6,8,0x1b,0x1b,0x1b,0x1b, +6,0,0,0,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,0x1b,0x19,0x1b,0,0, 0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -0x19,0x1b,0x1b,0x1b,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x14,0x15,0x17,0,0, -0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,0x17,0x13,0x13,0x16,0x16,0x14,0x15,0x14,0x15,0x14,0x15,0x14, -0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x14,0x15,0x17,0x17,0x17,0x17,0x16,0x16,0x16, -0x17,0x17,0x17,0,0x17,0x17,0x17,0x17,0x13,0x14,0x15,0x14,0x15,0x14,0x15,0x17, -0x17,0x17,0x18,0x13,0x18,0x18,0x18,0,0x17,0x19,0x17,0x17,0,0,0,0, -5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x17,0x17,0x17,0x17,0,0,0,0, +0,0,0,0,8,8,8,8,6,6,0,0,0,0,0,0, +0,0,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,8,8,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,0,0x10,0,0,5,5,5,5,5,5, -0,0,5,5,5,5,5,5,0,0,5,5,5,5,5,5, -0,0,5,5,5,0,0,0,0x19,0x19,0x18,0x1a,0x1b,0x19,0x19,0, -0x1b,0x18,0x18,0x18,0x18,0x1b,0x1b,0,0,0,0,0,0,0,0,0, -0,0x10,0x10,0x10,0x1b,0x1b,0,0,0,0x17,0x17,0x17,0x19,0x17,0x17,0x17, -0x14,0x15,0x17,0x18,0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x17,0x17,0x18,0x18,0x18,0x17,0x1a,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,0x14,0x18,0x15,0x18,0x14,0x15,0x17,0x14,0x15,0x17,0x17,5,5, -5,5,5,5,5,5,5,5,4,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,4,4,5,5,5,5, -5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0,5,5,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0,0,0,0,0,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b, -0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0,0,0,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0x17,0,0,0,0,0x58b, -0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b, -0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0x7ca,0x7ca,0x7ca,0x7ca,0x7ca,0xcca,0x11ca,0x11ca, -0x11ca,0x11ca,0x1e4a,0x880a,0x980a,0x980a,0x980a,0x980a,0x980a,0x784a,0x984a,0x68a,0x11ca,0x344b,0x344b,0x388b, -0x3ccb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x54b,0x34cb, -0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0,0,0,0x34ca,0x344a,0x58a,0x68a,0x11ca,0x980a,0x984a,0x988a,0x68a,0x7ca,0x11ca,0x1e4a, -0x980a,0x784a,0x984a,0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x788a,0x988a,0x7ca,0x58a,0x58a,0x58a,0x5ca, -0x5ca,0x5ca,0x5ca,0x68a,0x1b,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,6,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,6,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b, -0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b, -0,0,0,0,0x58b,0x68b,0x7cb,0x11cb,0,0,0,0,0,0,0,0, -0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x1bca,5,5,5,5,5,5,5,5,0xb80a,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0x17,5,5,5,5,0,0,0,0,5,5,5,5, -5,5,5,5,0x17,0x58a,0x5ca,0x7ca,0xa4a,0x1e4a,0,0,0,0,0,0, -0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, -0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2, -2,2,2,2,5,5,5,5,5,5,5,5,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0x17, -1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1, -2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,0,2,2,2,2,2,2,2,0,2,2,0,0,0, -1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1, -1,1,1,0,1,1,0,2,2,2,2,2,2,2,2,2, -5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -4,0,4,4,4,4,4,4,4,4,4,0,0,0,0,0, -4,4,4,4,4,4,0,4,4,4,4,4,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,5,5,0,0,0,5,0,0,5, -5,5,5,5,5,5,0,0,5,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0x17,0x58b,0x5cb,0x60b,0x7cb,0xa4b,0x1e4b,0x784b,0x788b,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0,0,0,0, -0,0,0,0x58b,0x5cb,0x60b,0x64b,0x64b,0x68b,0x7cb,0xa4b,0x1e4b,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,5,0,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x58b,0x7cb,0xa4b,0x1e4b,0x5cb,0x60b,0,0,0,0x17,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0,0,0,0x17,0xa04b,0xa84b,0xb04b,0xb84b, -0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x88cb,0x90cb,0x98cb,0xa0cb,0xa8cb, -0xb0cb,0xb8cb,0x36cb,0x354b,0x34cb,0x348b,0x46cb,0x344b,0x4ecb,0x388b,0x3ccb,0x454b,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0,0,0,0,0x5ecb,0x344b,5,5,0x58b,0x5cb,0x60b,0x64b, -0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0,0,0x1e4b,0x800b, -0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0x30b,0x34b,0x38b,0x3cb, -0x7cb,0xa4b,0x1e4b,0x784b,0x344b,0,0,0,0,0,0,0,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,5,6,6,6, -0,6,6,0,0,0,0,0,6,6,6,6,5,5,5,5, -0,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -6,6,6,0,0,0,0,6,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x58b,0x11cb,0x17,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x58b,0x7cb,0xa4b,5,5,5,5,5,6,6,0, -0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -0x1b,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0,0,0,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0,0,0x58b,0x5cb,0x60b,0x64b, -0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b, -0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0,0,0,0,0,0x17,0x17,0x17, -0x17,0,0,0,0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b, -0x64b,0x7cb,0xa4b,0x1e4b,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,0,0,0,0,0,0,0,0x58b,0x68b, -0x7cb,0x11cb,0x1e4b,0x784b,5,5,5,5,6,6,6,6,0,0,0,0, -0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, -0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0xa4b,0xccb, -0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x344b, -0x34cb,0x348b,0x388b,0,5,5,5,5,5,5,5,5,5,5,0,6, -6,0x13,0,0,5,5,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,6,6,6,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0xccb,0x1e4b,0x344b,5, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6, -6,6,6,6,6,0x58b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0,0, -0,0,0,0,5,5,6,6,6,6,0x17,0x17,0x17,0x17,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,6,5,5,6,6,5,0,0,0,0,0,0, -0,0,0,6,8,6,8,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb, -0xa4b,0xccb,0xf4b,0x11cb,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,8,8,8,6,6,6,6,8,8,6,6,0x17, -0x17,0x10,0x17,0x17,0x17,0x17,6,0,0,0,0,0,0,0,0,0, -0,0x10,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, -5,5,5,6,6,6,6,6,8,6,6,6,6,6,6,6, -6,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17, -5,8,8,5,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,5, +5,5,5,5,8,8,8,8,8,8,8,8,8,8,8,8, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,5,5,5,5,5,5,0x17,0x17,0x17,5,0x17,5,5,6, +5,5,5,5,5,5,6,6,6,6,6,6,6,6,0x17,0x17, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,6,6,6,6,6,6,6,6,6,6,6,8,8, +0,0,0,0,0,0,0,0,0,0,0,0x17,8,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,4,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6, -0x17,0x17,5,0,0,0,0,0,0,0,0,0,8,5,5,5, -5,0x17,0x17,0x17,0x17,6,6,6,6,0x17,8,6,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,5,0x17,5,0x17,0x17,0x17,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8, -8,8,6,6,6,6,6,6,6,6,6,8,0,0x58b,0x5cb,0x60b, -0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b, -0x784b,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,8,8,8,6,6,6,8,8, -6,8,6,6,0x17,0x17,0x17,0x17,0x17,0x17,6,5,5,6,0,0, +8,8,6,6,6,6,8,8,6,6,8,8,5,5,5,5, +5,6,4,5,5,5,5,5,5,5,5,5,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,0,5,5,5,5, +5,5,5,5,5,6,6,6,6,6,6,8,8,6,6,8, +8,6,6,0,0,0,0,0,0,0,0,0,5,5,5,6, +5,5,5,5,5,5,5,5,6,8,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0x17,0x17,0x17,0x17,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,4,5,5,5, +5,5,5,0x1b,0x1b,0x1b,5,8,6,8,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,6,5,6,6, +6,5,5,6,6,5,5,5,5,5,6,6,5,6,5,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,0,5,5,5,5,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5, -5,0x17,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6, -6,6,6,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,5,5,8,8,0,0,6,6, -6,6,6,6,6,0,0,0,6,6,6,6,6,0,0,0, -0,0,0,0,0,0,0,0,6,6,8,8,0,5,5,5, -5,5,5,5,5,0,0,5,5,0,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, -5,0,5,5,0,5,5,5,5,5,0,6,6,5,8,8, -6,8,8,8,8,0,0,8,8,0,0,8,8,8,0,0, -5,0,0,0,0,0,0,8,0,0,0,0,0,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,8,8,8,6,6,6,6,6,6,6,6, -8,8,6,6,6,8,6,5,5,5,5,0x17,0x17,0x17,0x17,0x17, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0,0x17,6,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -8,8,8,6,6,6,6,6,6,8,6,8,8,8,8,6, -6,8,6,6,5,5,0x17,5,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8, -8,8,6,6,6,6,0,0,8,8,8,8,6,6,8,6, -6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,6,6,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -8,8,8,6,6,6,6,6,6,6,6,8,8,6,8,6, -6,0x17,0x17,0x17,5,0,0,0,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,6,8,6,8,8, -6,6,6,6,6,6,8,6,5,0x17,0,0,0,0,0,0, -8,8,6,6,6,6,8,6,6,6,6,6,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0x17,0x17,0x17,0x1b, -5,5,5,5,5,5,5,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6, -6,6,6,6,6,6,6,6,8,6,6,0x17,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, -0x16cb,0x194b,0x1bcb,0,0,0,0,0,0,0,0,0,0,0,0,5, -8,5,8,6,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,5,5,5,5,5,0,0,5,0,0,5,5,5,5, -5,5,5,5,0,5,5,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,8,8,8,8,8,8,0,8, -8,0,0,6,6,8,6,5,6,5,0x17,5,8,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,8,8,8,6,6,6,6, -0,0,6,6,8,8,8,8,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6, -6,8,5,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,6, -0,0,0,0,0,0,0,0,5,6,6,6,6,6,6,8, -8,6,6,6,5,5,5,5,5,6,6,6,6,6,6,6, -6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x17,0x17,0x17,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6, -6,6,6,6,6,6,6,8,6,6,0x17,0x17,0x17,5,0x17,0x17, -5,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0,0,0, -0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,8,6,6,6,6, -6,6,6,0,6,6,6,6,6,6,8,6,6,6,6,6, -6,6,6,6,0,8,6,6,6,6,6,6,6,8,6,6, -8,6,6,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,5,6, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,5,5,5,5,5,5,5,0, -5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,0, -0,0,6,0,6,6,0,6,5,5,5,5,5,5,5,5, -5,5,8,8,8,8,8,0,6,6,0,8,8,6,8,6, -5,0,0,0,0,0,0,0,5,5,5,5,5,5,0,5, -5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,6,6,8,8,0x17, -0x17,0,0,0,0,0,0,0,6,8,6,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,6,6,5,8,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -8,8,6,6,6,6,6,0,0,0,8,8,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x19,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, -0,0,0,0,0,0,0,0,0,0,0,0x17,0xcd0b,0xcc0b,0xcb0b,0xd00b, -0xca0b,0xcf0b,0xcb4b,0xd04b,0xc90b,0x37cb,0x37cb,0x364b,0x35cb,0xc94b,0x3fcb,0x350b,0x34cb,0x344b,0x344b,0x3ccb, -0xcd0b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x19,0x19,0x34ca,0x354a,0x34ca,0x34ca, -0x344a,0x348a,0x388a,0xf4a,0x11ca,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0,0x17,0x17,0x17,0x17, -0x17,0,0,0,0,0,0,0,0,0,0,0,0x5ca,0x60a,0x64a,0x68a, -0x6ca,0x70a,0x74a,0x78a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x64a,0x68a,0x6ca,0x70a,0x74a, -0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x5ca, -0x60a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0xc08a,0xc18a, -0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0x60a,0x60a,0x64a,0x64a,0x64a,0x64a,0x6ca,0x70a,0x70a,0x70a, -0x74a,0x74a,0x78a,0x78a,0x78a,0x78a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x58a,0x5ca,0x60a,0x64a,0x64a, -0x68a,0x68a,0x5ca,0x60a,0x58a,0x5ca,0x348a,0x388a,0x454a,0x348a,0x388a,0x35ca,5,5,5,5, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x17,0x17,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0x10,0x10,0x10,0x10, -0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6,5,5,5, -5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0,0,0,0,0,0,0,5,5,4,0x17,0x17,5,5,5,5, +5,5,5,5,5,5,5,8,6,6,8,8,0x17,0x17,5,4, +4,8,6,0,0,0,0,0,0,0,0,0,0,5,5,5, +5,5,5,0,0,5,5,5,5,5,5,0,0,5,5,5, +5,5,5,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,0,5,5,5,5,5,5,5,0,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,0x1a,4,4,4,4,2,2,2,2, +2,2,2,2,2,4,0x1a,0x1a,0,0,0,0,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,5,5,5,8, +8,6,8,8,6,8,8,0x17,8,6,0,0,0x49,0x89,0xc9,0x109, 0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -6,6,6,6,6,0x17,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b, -4,4,4,4,0x17,0x1b,0,0,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0x7cb,0x1e4b,0x788b,0x790b,0x798b, -0x7a0b,0x7a8b,0,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,0,0,0,0,5,5,5, -0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b, -0x94b,0x98b,0x9cb,0xa0b,0x58b,0x5cb,0x60b,0x17,0x17,0x17,0x17,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,6, -5,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, -8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, -8,8,8,8,0,0,0,0,0,0,0,6,6,6,6,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,0x17,4, -6,0,0,0,0,0,0,0,0,0,0,0,8,8,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,4,4,4,4,0,4,4,4,4,4,4,4, -0,4,4,0,5,5,5,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,0,0,5,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,0,0,0x1b,6,6,0x17, -0x10,0x10,0x10,0x10,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,0,0,0,0,5,5,5,5, +5,5,5,0,0,0,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x11,0x11,0x11,0x11, +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,5,5,5,5, +5,5,5,5,5,5,5,0x605,5,5,5,5,5,5,5,0x7c5, +5,5,5,5,0x5c5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x6c5,5,0x6c5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x7c5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x18,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, +5,0,5,0,5,5,0,5,5,0,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,2,2,2,2,2,2,2,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,2,2,2,0,0,0,0, +0,5,6,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, +0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0x15,0x14,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0x1b, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,0x19,0x1b,0x1b,0x1b, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, -0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,8,8,6,6,6,0x1b,0x1b, -0x1b,8,8,8,8,8,8,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6, -6,6,6,6,6,6,6,0x1b,0x1b,6,6,6,6,6,6,6, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6, -6,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x54b,0x58b,0x5cb,0x60b, -0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b, -0,0,0,0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b, -0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x58b,0x5cb, -0x60b,0x64b,0x68b,0x58b,0x68b,0,0,0,0,0,0,0,0x249,0x289,0x49,0x89, -0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, -2,0,2,2,2,2,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1, -1,0,1,1,1,1,1,1,1,1,2,2,2,2,0,2, -0,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2, -2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,2,2,2,2,1,1,0,1,1,1,1,0, -0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, -1,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,1, -1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1, -1,1,1,1,1,0,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x18,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x18, -2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x18, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,0x18,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,0x18, -2,2,2,2,2,2,1,2,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,6, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x14,0x15,0x17,0,0,0,0,0,0, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,0x1b,0x1b,0x1b,0x1b,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6, -6,6,6,6,6,6,6,0,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,0,0,6,6,6,6,6, -2,2,2,2,2,2,2,2,2,2,5,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0, -0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -6,6,0,6,6,0,6,6,6,6,6,0,0,0,0,0, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,5,0x1b, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,0, -6,6,6,6,6,6,6,4,4,4,4,4,4,4,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0x19, -5,5,5,5,5,5,5,5,5,5,5,4,6,6,6,6, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,5,5,5,5,5,0,5,5,5,5,0,5,5,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,5,5,5,5,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b, -6,6,6,6,6,6,6,0,0,0,0,0,0,0,0,0, -2,2,2,2,6,6,6,6,6,6,6,4,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17, -1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, -0x78cb,0x794b,0x814b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x1b,0x34cb,0x344b,0x3ccb, -0x19,0x58b,0x5cb,0x788b,0x78cb,0,0,0,0,0,0,0,0,0,0,0, -0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b, -0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb, -0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x1b,0x5cb, -0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x900b,0xa00b,0x804b,0x788b,0x344b,0x354b,0,0, -0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, -0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x17,0x13,0x13,0x16,0x16,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14, +0x15,0x17,0x17,0x14,0x15,0x17,0x17,0x17,0x17,0x16,0x16,0x16,0x17,0x17,0x17,0, +0x17,0x17,0x17,0x17,0x13,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x17,0x18,0x13, +0x18,0x18,0x18,0,0x17,0x19,0x17,0x17,0,0,0,0,5,5,5,5, +5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0x10,0,0,5,5,5,5,5,5,0,0,5,5, +5,5,5,5,0,0,5,5,5,5,5,5,0,0,5,5, +5,0,0,0,0x19,0x19,0x18,0x1a,0x1b,0x19,0x19,0,0x1b,0x18,0x18,0x18, +0x18,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0x10,0x10,0x10, +0x1b,0x1b,0,0,0,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18, +0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17, +0x18,0x18,0x18,0x17,0x1a,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x14, +0x18,0x15,0x18,0x14,0x15,0x17,0x14,0x15,0x17,0x17,5,5,5,5,5,5, +5,5,5,5,4,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,4,4,5,5,5,5,5,5,5,5, 5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0,5,5,0,5,0,0,5,0,5,5,5,5,5,5,5, -5,5,5,0,5,5,5,5,0,5,0,5,0,0,0,0, -0,0,5,0,0,0,0,5,0,5,0,5,0,5,5,5, -0,5,5,0,5,0,0,5,0,5,0,5,0,5,0,5, -0,5,5,0,5,0,0,5,5,5,5,0,5,5,5,5, -5,5,5,0,5,5,5,5,0,5,5,5,5,0,5,0, -5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -0,5,5,5,0,5,5,5,5,5,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x2cb,0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x54b,0x54b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0, -0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, +5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, +0,0,0,0,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b, +0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x17,0x17,0x17,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b, +0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b, +0x900b,0x980b,0xa00b,0xa80b,0x7ca,0x7ca,0x7ca,0x7ca,0x7ca,0xcca,0x11ca,0x11ca,0x11ca,0x11ca,0x1e4a,0x880a, +0x980a,0x980a,0x980a,0x980a,0x980a,0x784a,0x984a,0x68a,0x11ca,0x344b,0x344b,0x388b,0x3ccb,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x54b,0x34cb,0x1b,0x1b,0x1b,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, +0x34ca,0x344a,0x58a,0x68a,0x11ca,0x980a,0x984a,0x988a,0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x984a,0x68a, +0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x788a,0x988a,0x7ca,0x58a,0x58a,0x58a,0x5ca,0x5ca,0x5ca,0x5ca,0x68a, +0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x1a,0x1a, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +6,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, +0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0,0,0,0, +0x58b,0x68b,0x7cb,0x11cb,0,0,0,0,0,0,0,0,0,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x1bca,5,5,5,5,5,5,5,5,0xb80a,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0x17, +5,5,5,5,0,0,0,0,5,5,5,5,5,5,5,5, +0x17,0x58a,0x5ca,0x7ca,0xa4a,0x1e4a,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,0,0,2,2,2,2,2,2,2,2, +5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,0,0,0,0,0,0,0,0x17,1,1,1,1, +1,1,1,1,1,1,1,0,1,1,1,1,2,2,0,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2, +2,2,2,2,2,2,0,2,2,0,0,0,1,1,1,1, +1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0, +1,1,0,2,2,2,2,2,2,2,2,2,5,5,5,5, +5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4, +4,4,4,4,4,4,4,0,0,0,0,0,4,4,4,4, +4,4,0,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,5,5,0,0,0,5,0,0,5,5,5,5,5, +5,5,0,0,5,0,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0x17, +0x58b,0x5cb,0x60b,0x7cb,0xa4b,0x1e4b,0x784b,0x788b,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1b, +0x1b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0,0,0,0,0,0,0,0x58b, +0x5cb,0x60b,0x64b,0x64b,0x68b,0x7cb,0xa4b,0x1e4b,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,5,5,0,0, +0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x58b,0x7cb, +0xa4b,0x1e4b,0x5cb,0x60b,0,0,0,0x17,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,0,0,0,0x17,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b, +0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x88cb,0x90cb,0x98cb,0xa0cb,0xa8cb,0xb0cb,0xb8cb,0x36cb,0x354b, +0x34cb,0x348b,0x46cb,0x344b,0x4ecb,0x388b,0x3ccb,0x454b,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,0x5ecb,0x344b,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b, +0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0,0,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b, +0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0x30b,0x34b,0x38b,0x3cb,0x7cb,0xa4b,0x1e4b,0x784b, +0x344b,0,0,0,0,0,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0,0,0,0,0,0,0,5,6,6,6,0,6,6,0, +0,0,0,0,6,6,6,6,5,5,5,5,0,5,5,5, +0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,0, +0,0,0,6,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x58b,0x11cb,0x17,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x58b,0x7cb,0xa4b,5,5,5,5,5,6,6,0,0,0,0,0x58b, +0x68b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,0x1b,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,0,0,0,0,0,0x17,0x17,0x17,0x17,0,0,0, +0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,0,0,0,0,0,0,0,0x58b,0x68b,0x7cb,0x11cb,0x1e4b,0x784b, +5,5,5,5,6,6,6,6,0,0,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb, +0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x344b,0x34cb,0x348b,0x388b,0, +5,5,5,5,5,5,5,5,5,5,0,6,6,0x13,0,0, +5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0xccb,0x1e4b,0x344b,5,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6, +6,0x58b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0, +5,5,6,6,6,6,0x17,0x17,0x17,0x17,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289, +6,5,5,6,6,5,0,0,0,0,0,0,0,0,0,6, +8,6,8,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0, +0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +8,8,8,6,6,6,6,8,8,6,6,0x17,0x17,0x10,0x17,0x17, +0x17,0x17,6,0,0,0,0,0,0,0,0,0,0,0x10,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0,0,5,5,5,5,5,5,5,6, +6,6,6,6,8,6,6,6,6,6,6,6,6,0,0x49,0x89, +0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,5,8,8,5, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,6,6,6,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,6,0x17,0x17,5,0, +0,0,0,0,0,0,0,0,8,5,5,5,5,0x17,0x17,0x17, +0x17,6,6,6,6,0x17,8,6,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,5,0x17,5,0x17,0x17,0x17,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,8,8,8,6,6, +6,6,6,6,6,6,6,8,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b, +0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,8,8,8,6,6,6,8,8,6,8,6,6, +0x17,0x17,0x17,0x17,0x17,0x17,6,5,5,6,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,5,0,5,5, +5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,5,5,5,5,5,5,5,5,5,5,0x17,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,8,8,8,6,6,6,6,6,6,6,6,0, +0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,5,5,8,8,0,0,6,6,6,6,6,6, +6,0,0,0,6,6,6,6,6,0,0,0,0,0,0,0, +0,0,0,0,6,6,8,8,0,5,5,5,5,5,5,5, +5,0,0,5,5,0,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5, +0,5,5,5,5,5,0,6,6,5,8,8,6,8,8,8, +8,0,0,8,8,0,0,8,8,8,0,0,5,0,0,0, +0,0,0,8,0,0,0,0,0,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,8,8,8,6,6,6,6,6,6,6,6,8,8,6,6, +6,8,6,5,5,5,5,0x17,0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0,0x17,6,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6, +6,6,6,6,6,8,6,8,8,8,8,6,6,8,6,6, +5,5,0x17,5,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,8,8,8,6,6, +6,6,0,0,8,8,8,8,6,6,8,6,6,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,5,5,5,5,6,6,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6, +6,6,6,6,6,6,6,8,8,6,8,6,6,0x17,0x17,0x17, +5,0,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,6,8,6,8,8,6,6,6,6, +6,6,8,6,5,0x17,0,0,0,0,0,0,8,8,6,6, +6,6,8,6,6,6,6,6,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0x17,0x17,0x17,0x1b,5,5,5,5, +5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6, +6,6,6,6,8,6,6,0x17,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0, +0,0,0,0,0,0,0,0,0,0,0,5,8,5,8,6, +0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, +5,5,5,0,0,5,0,0,5,5,5,5,5,5,5,5, +0,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,8,8,8,8,8,8,0,8,8,0,0,6, +6,8,6,5,6,5,0x17,5,8,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,0,0,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,8,8,8,6,6,6,6,0,0,6,6, +8,8,8,8,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,6,6,6,6,6,6,8,5,6, +6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,6,0,0,0,0, +0,0,0,0,5,6,6,6,6,6,6,8,8,6,6,6, +5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6, +6,6,6,8,6,6,0x17,0x17,0x17,5,0x17,0x17,5,0x17,0x17,0x17, +0x17,0x17,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x70b,0x74b,0x78b,0x7cb, +0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0,0,0,0x17,0x17,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,8,6,6,6,6,6,6,6,0, +6,6,6,6,6,6,8,6,6,6,6,6,6,6,6,6, +0,8,6,6,6,6,6,6,6,8,6,6,8,6,6,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,5,6,0,0,0,0, +0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,5,5,5,5,5,5,5,0,5,5,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,6,6,6,6,6,6,0,0,0,6,0, +6,6,0,6,5,5,5,5,5,5,5,5,5,5,8,8, +8,8,8,0,6,6,0,8,8,6,8,6,5,0,0,0, +0,0,0,0,5,5,5,5,5,5,0,5,5,0,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,6,6,8,8,0x17,0x17,0,0,0, +0,0,0,0,6,8,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,6,6,5,8,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,8,8,6,6, +6,6,6,0,0,0,8,8,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x19,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x17,0xcd0b,0xcc0b,0xcb0b,0xd00b,0xca0b,0xcf0b,0xcb4b,0xd04b, +0xc90b,0x37cb,0x37cb,0x364b,0x35cb,0xc94b,0x3fcb,0x350b,0x34cb,0x344b,0x344b,0x3ccb,0xcd0b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x19,0x19,0x34ca,0x354a,0x34ca,0x34ca,0x344a,0x348a,0x388a,0xf4a, +0x11ca,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0,0x17,0x17,0x17,0x17,0x17,0,0,0, +0,0,0,0,0,0,0,0,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a, +0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a, +0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x5ca,0x60a,0x60a,0x64a,0x68a, +0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0xc08a,0xc18a,0x58a,0x5ca,0x60a,0x60a, +0x64a,0x68a,0x60a,0x60a,0x64a,0x64a,0x64a,0x64a,0x6ca,0x70a,0x70a,0x70a,0x74a,0x74a,0x78a,0x78a, +0x78a,0x78a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x58a,0x5ca,0x60a,0x64a,0x64a,0x68a,0x68a,0x5ca,0x60a, +0x58a,0x5ca,0x348a,0x388a,0x454a,0x348a,0x388a,0x35ca,5,5,5,5,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x17,0x17,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6,5,5,5,5,5,5,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0, +0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,6, +6,0x17,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6, +6,6,6,0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,4,4,4,4, +0x17,0x1b,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0x7cb,0x1e4b,0x788b,0x790b,0x798b,0x7a0b,0x7a8b,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,0,0,0,0,5,5,5,0x54b,0x58b,0x5cb,0x60b, +0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b, +0x58b,0x5cb,0x60b,0x17,0x17,0x17,0x17,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,0,0,0,0,6,5,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +0,0,0,0,0,0,0,6,6,6,6,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,0x17,4,6,0,0,0, +0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,4,4,0,4,4,4,4,4,4,4,0,4,4,0, +5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,0,0,5,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,0,0,0x1b,6,6,0x17,0x10,0x10,0x10,0x10, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, -0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,0,0,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0, +0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b, 0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,8,8,6,6,6,0x1b,0x1b,0x1b,8,8,8, +8,8,8,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6,6,6,6,6, +6,6,6,0x1b,0x1b,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,0x1b,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b, +0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0,0,0,0, +0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b, +0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x58b,0x5cb,0x60b,0x64b,0x68b,0x58b, +0x68b,0,0,0,0,0,0,0,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89, +0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,2,2,2,2,2,2,2,0,2,2, +2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,0,1,1, +0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,1, +1,1,1,1,1,1,2,2,2,2,0,2,0,2,2,2, +2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +2,2,2,2,1,1,0,1,1,1,1,0,0,1,1,1, +1,1,1,1,1,0,1,1,1,1,1,1,1,0,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,1,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1, +1,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +2,2,2,2,2,2,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0x18,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,2,2, +2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x18,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,0x18,2,2,2,2,2,2,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,2,2,2,0x18,2,2,2,2, +2,2,1,2,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0x1b, +0x1b,0x1b,0x1b,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,6,6,6,6,6, +6,6,6,0,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,0,0,6,6,6,6,6,2,2,2,2, +2,2,2,2,2,2,5,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0, +0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,6,6,0,6, +6,0,6,6,6,6,6,0,0,0,0,0,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,5,0x1b,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,0,0,6,6,6,6, +6,6,6,4,4,4,4,4,4,4,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,6,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,6,6,6,6,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0x19,5,5,5,5, +5,5,5,5,5,5,5,4,6,6,6,6,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, +5,5,5,0,5,5,5,5,0,5,5,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, +5,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,6,6,6,6, +6,6,6,0,0,0,0,0,0,0,0,0,2,2,2,2, +6,6,6,6,6,6,6,4,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17,1,1,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b, +0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x78cb,0x794b,0x814b,0x58b, +0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x1b,0x34cb,0x344b,0x3ccb,0x19,0x58b,0x5cb,0x788b, +0x78cb,0,0,0,0,0,0,0,0,0,0,0,0x16cb,0x194b,0x1bcb,0x1e4b, +0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b, +0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x984b,0xa04b,0xa84b,0xb04b, +0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x1b,0x5cb,0x60b,0x64b,0x68b,0x6cb, +0x70b,0x74b,0x78b,0x7cb,0x900b,0xa00b,0x804b,0x788b,0x344b,0x354b,0,0,0,0x58b,0x5cb,0x60b, +0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b, +0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x18,0x18,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,0, +5,0,0,5,0,5,5,5,5,5,5,5,5,5,5,0, +5,5,5,5,0,5,0,5,0,0,0,0,0,0,5,0, +0,0,0,5,0,5,0,5,0,5,5,5,0,5,5,0, +5,0,0,5,0,5,0,5,0,5,0,5,0,5,5,0, +5,0,0,5,5,5,5,0,5,5,5,5,5,5,5,0, +5,5,5,5,0,5,5,5,5,0,5,0,5,5,5,5, +5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0,5,5,5, +0,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x2cb,0x30b,0x34b, +0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x54b,0x54b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, 0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,5,0x705,5,5,5,5,5,5,5,5,5,5, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +5,0x705,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x645,5,5,5,5,5,5,5,5,5,5,5, +0x645,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x645,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x685,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0xcc5,5,5,5,5,5,5,5,5,0xf45,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0xf45,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x6c5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x605,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0xcc5, +5,5,5,5,5,5,5,5,0xf45,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0xf45,5,5,5,5,5,5,5, +5,5,5,5,5,5,0x6c5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,0x605,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x785,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, +0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x605, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x645,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, -0x10,0x10,0x10,0x10,0,0x10,0,0,0,0,0,0,0,0,0,0, +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, +0,0x10,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, 0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, -0x11,0x11,0,0,0,0,0,0 +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0, +0,0,0,0 }; static const UTrie2 propsTrie={ @@ -1458,155 +1467,155 @@ static const UTrie2 propsTrie={ propsTrie_index+4692, nullptr, 4692, - 18324, + 18464, 0xa40, 0x12d4, 0x0, 0x0, 0x110000, - 0x59e4, + 0x5a70, nullptr, 0, false, false, 0, nullptr }; -static const uint16_t propsVectorsTrie_index[32692]={ -0x539,0x541,0x549,0x551,0x569,0x571,0x579,0x581,0x589,0x591,0x599,0x5a1,0x5a9,0x5b1,0x5b9,0x5c1, -0x5c8,0x5d0,0x5d8,0x5e0,0x5e3,0x5eb,0x5f3,0x5fb,0x603,0x60b,0x613,0x61b,0x623,0x62b,0x633,0x63b, -0x643,0x64b,0x652,0x65a,0x662,0x66a,0x672,0x67a,0x682,0x68a,0x68f,0x697,0x69e,0x6a6,0x6ae,0x6b6, -0x6be,0x6c6,0x6ce,0x6d6,0x6dd,0x6e5,0x6ed,0x6f5,0x6fd,0x705,0x70d,0x715,0x71d,0x725,0x72d,0x735, -0x1b39,0xd8a,0xe56,0x118d,0x12cc,0x1d01,0x1ea0,0x1cf9,0x13e6,0x13f6,0x13de,0x13ee,0x80a,0x810,0x818,0x820, -0x828,0x82e,0x836,0x83e,0x846,0x84c,0x854,0x85c,0x864,0x86a,0x872,0x87a,0x882,0x88a,0x892,0x899, -0x8a1,0x8a7,0x8af,0x8b7,0x8bf,0x8c5,0x8cd,0x8d5,0x8dd,0x13fe,0x8e5,0x8ed,0x8f5,0x8fc,0x904,0x90c, -0x914,0x918,0x920,0x927,0x92f,0x937,0x93f,0x947,0x1719,0x1721,0x94f,0x957,0x95f,0x967,0x96f,0x976, -0x177f,0x176f,0x1777,0x1a74,0x1a7c,0x140e,0x97e,0x1406,0x1662,0x1662,0x1664,0x1422,0x1423,0x1416,0x1418,0x141a, -0x1787,0x1789,0x986,0x1789,0x98e,0x993,0x99b,0x178e,0x9a1,0x1789,0x9a7,0x9af,0xc6a,0x1796,0x1796,0x9b7, -0x17a6,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7,0x17a7, -0x17a7,0x17a7,0x17a7,0x179e,0x9bf,0x17af,0x17af,0x9c7,0xb92,0xb9a,0xba2,0xbaa,0x17bf,0x17b7,0x9cf,0x9d7, -0x9df,0x17c9,0x17d1,0x9e7,0x17c7,0x9ef,0x1b41,0xd92,0xbb2,0xbba,0xbc2,0xbc7,0x19da,0xc91,0xc98,0x1936, -0xc42,0x1b49,0xd9a,0xda2,0xdaa,0xdb2,0xf60,0xf64,0x1a3a,0x1a3f,0xcd0,0xcd8,0x1ab0,0x1ab8,0x1c19,0xe5e, -0x1ac0,0xd1e,0xd26,0x1ac8,0x1105,0x11b5,0xf38,0xdba,0x1956,0x193e,0x194e,0x1946,0x19f2,0x19ea,0x19a6,0x1a32, -0x142b,0x142b,0x142b,0x142b,0x142e,0x142b,0x142b,0x1436,0x9f7,0x143e,0x9fb,0xa03,0x143e,0xa0b,0xa13,0xa1b, -0x144e,0x1446,0x1456,0xa23,0xa2b,0x145e,0xa33,0xa3b,0x1466,0x146e,0x1476,0x147e,0xa43,0x1486,0x148d,0x1495, -0x149d,0x14a5,0x14ad,0x14b5,0x14bd,0x14c4,0x14cc,0x14d4,0x14dc,0x14e4,0x14e7,0x14e9,0x17d9,0x18cc,0x18d2,0x1a22, -0x14f1,0xa4b,0xa53,0x1617,0x161c,0x161f,0x1625,0x14f9,0x162d,0x162d,0x1509,0x1501,0x1511,0x1519,0x1521,0x1529, -0x1531,0x1539,0x1541,0x1549,0x18da,0x192e,0x1a84,0x1be1,0x1559,0x155f,0x1567,0x156f,0x1551,0x1577,0x18e2,0x18e9, -0x17e1,0x17e1,0x17e1,0x17e1,0x17e1,0x17e1,0x17e1,0x17e1,0x18f1,0x18f1,0x18f1,0x18f1,0x18f9,0x1900,0x1902,0x1909, -0x1911,0x1915,0x1915,0x1918,0x1915,0x1915,0x191e,0x1915,0x195e,0x1a2a,0x1a8c,0xbcf,0xbd5,0x1d45,0x1d4d,0x1e2b, -0x19ca,0x19be,0x19c2,0x1a47,0x19ae,0x19ae,0x19ae,0xc52,0x19b6,0xc72,0x1a0a,0xcc0,0xc5a,0xc62,0xc62,0x1ad0, -0x19fa,0x1a94,0xca8,0xcb0,0xa5b,0x17e9,0x17e9,0xa63,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0xa6b,0x73d, -0x164a,0x166c,0xa73,0x1674,0xa7b,0x167c,0x1684,0x168c,0xa83,0xa88,0x1694,0x169b,0xa8d,0x17f9,0x1a1a,0xc4a, -0xa95,0x16f6,0x16fd,0x16a3,0x1705,0x1709,0x16ab,0x16af,0x16c8,0x16c8,0x16ca,0x16b7,0x16bf,0x16bf,0x16c0,0x1711, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801, -0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1801,0x1804,0x1966,0x1966, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2, -0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d2,0x16d9,0x1b31,0x1f0c, -0x180c,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, -0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, -0x1812,0x1812,0x1812,0x1812,0xa9d,0x181a,0xaa5,0x1b51,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc, -0x1ad8,0xd2e,0x1aec,0x1ae4,0x1aee,0x1b59,0x1b59,0xdc2,0x19d2,0x1a4f,0x1aa4,0x1aa8,0x1a9c,0x1c11,0xce0,0xce7, -0x1a02,0xcb8,0x1a57,0xcef,0x1af6,0x1af9,0xd36,0x1b61,0x1b09,0x1b01,0xd3e,0xdca,0x1b69,0x1b6d,0xdd2,0x100f, -0x1b11,0xd46,0xd4e,0x1b75,0x1b85,0x1b7d,0xdda,0xf08,0xe66,0xe6e,0x1d9b,0xfbf,0x1e48,0x1e48,0x1b8d,0xde2, -0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762, -0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764, -0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766, -0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761, -0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763, -0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765, -0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767, -0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762, -0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764, -0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766, -0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761, -0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763, -0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765, -0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767, -0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762, -0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764, -0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766, -0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761, -0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763, -0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765, -0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767, -0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0x1767,0x1761,0x1762,0x1763,0x1764,0x1765,0x1766,0xaad,0xdea,0xded, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739, -0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739,0x1739, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, -0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x16e1,0x16e1,0x16e1,0x16e1,0x16e1,0x16e1,0x16e1,0x16e1, -0x16e6,0x16ee,0x1926,0x13a3,0x1a12,0x1a12,0x13a7,0x13ae,0xab5,0xabd,0xac5,0x1597,0x159e,0x15a6,0xacd,0x15ae, -0x15ec,0x15ec,0x157f,0x1587,0x15b6,0x15e3,0x15e4,0x15f4,0x15be,0x15c3,0x15cb,0x15d3,0xad5,0x15db,0xadd,0x158f, -0xcc8,0x15fc,0xae5,0xaed,0x1604,0x160a,0x160f,0xaf5,0xb05,0x1652,0x165a,0x163d,0x1642,0xb0d,0xb15,0xafd, -0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729, -0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1729,0x1731,0x1731,0x1731,0x1731, -0x1564,0x1564,0x15a4,0x15e4,0x1624,0x1664,0x16a4,0x16e4,0x1720,0x1760,0x178c,0x17cc,0x180c,0x184c,0x188c,0x18cc, -0x190c,0x1948,0x1988,0x19c8,0x1a08,0x1a3c,0x1a78,0x1ab8,0x1af8,0x1b38,0x1b74,0x1bb4,0x1bf4,0x1c34,0x1c74,0x1cb4, -0xe59,0xa80,0xac0,0xb00,0xb40,0xb6b,0xf99,0xa40,0xed9,0xa40,0xa40,0xa40,0xa40,0xbab,0x13e2,0x13e2, +static const uint16_t propsVectorsTrie_index[32764]={ +0x53e,0x546,0x54e,0x556,0x56e,0x576,0x57e,0x586,0x58e,0x596,0x59e,0x5a6,0x5ae,0x5b6,0x5be,0x5c6, +0x5cd,0x5d5,0x5dd,0x5e5,0x5e8,0x5f0,0x5f8,0x600,0x608,0x610,0x618,0x620,0x628,0x630,0x638,0x640, +0x648,0x650,0x657,0x65f,0x667,0x66f,0x677,0x67f,0x687,0x68f,0x694,0x69c,0x6a3,0x6ab,0x6b3,0x6bb, +0x6c3,0x6cb,0x6d3,0x6db,0x6e2,0x6ea,0x6f2,0x6fa,0x702,0x70a,0x712,0x71a,0x722,0x72a,0x732,0x73a, +0x1b43,0xd8f,0xe5b,0x1192,0x12d1,0x1d0b,0x1eaa,0x1d03,0x13f0,0x1400,0x13e8,0x13f8,0x80f,0x815,0x81d,0x825, +0x82d,0x833,0x83b,0x843,0x84b,0x851,0x859,0x861,0x869,0x86f,0x877,0x87f,0x887,0x88f,0x897,0x89e, +0x8a6,0x8ac,0x8b4,0x8bc,0x8c4,0x8ca,0x8d2,0x8da,0x8e2,0x1408,0x8ea,0x8f2,0x8fa,0x901,0x909,0x911, +0x919,0x91d,0x925,0x92c,0x934,0x93c,0x944,0x94c,0x1723,0x172b,0x954,0x95c,0x964,0x96c,0x974,0x97b, +0x1789,0x1779,0x1781,0x1a7e,0x1a86,0x1418,0x983,0x1410,0x166c,0x166c,0x166e,0x142c,0x142d,0x1420,0x1422,0x1424, +0x1791,0x1793,0x98b,0x1793,0x993,0x998,0x9a0,0x1798,0x9a6,0x1793,0x9ac,0x9b4,0xc6f,0x17a0,0x17a0,0x9bc, +0x17b0,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1,0x17b1, +0x17b1,0x17b1,0x17b1,0x17a8,0x9c4,0x17b9,0x17b9,0x9cc,0xb97,0xb9f,0xba7,0xbaf,0x17c9,0x17c1,0x9d4,0x9dc, +0x9e4,0x17d3,0x17db,0x9ec,0x17d1,0x9f4,0x1b4b,0xd97,0xbb7,0xbbf,0xbc7,0xbcc,0x19e4,0xc96,0xc9d,0x1940, +0xc47,0x1b53,0xd9f,0xda7,0xdaf,0xdb7,0xf65,0xf69,0x1a44,0x1a49,0xcd5,0xcdd,0x1aba,0x1ac2,0x1c23,0xe63, +0x1aca,0xd23,0xd2b,0x1ad2,0x110a,0x11ba,0xf3d,0xdbf,0x1960,0x1948,0x1958,0x1950,0x19fc,0x19f4,0x19b0,0x1a3c, +0x1435,0x1435,0x1435,0x1435,0x1438,0x1435,0x1435,0x1440,0x9fc,0x1448,0xa00,0xa08,0x1448,0xa10,0xa18,0xa20, +0x1458,0x1450,0x1460,0xa28,0xa30,0x1468,0xa38,0xa40,0x1470,0x1478,0x1480,0x1488,0xa48,0x1490,0x1497,0x149f, +0x14a7,0x14af,0x14b7,0x14bf,0x14c7,0x14ce,0x14d6,0x14de,0x14e6,0x14ee,0x14f1,0x14f3,0x17e3,0x18d6,0x18dc,0x1a2c, +0x14fb,0xa50,0xa58,0x1621,0x1626,0x1629,0x162f,0x1503,0x1637,0x1637,0x1513,0x150b,0x151b,0x1523,0x152b,0x1533, +0x153b,0x1543,0x154b,0x1553,0x18e4,0x1938,0x1a8e,0x1beb,0x1563,0x1569,0x1571,0x1579,0x155b,0x1581,0x18ec,0x18f3, +0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x18fb,0x18fb,0x18fb,0x18fb,0x1903,0x190a,0x190c,0x1913, +0x191b,0x191f,0x191f,0x1922,0x191f,0x191f,0x1928,0x191f,0x1968,0x1a34,0x1a96,0xbd4,0xbda,0x1d4f,0x1d57,0x1e35, +0x19d4,0x19c8,0x19cc,0x1a51,0x19b8,0x19b8,0x19b8,0xc57,0x19c0,0xc77,0x1a14,0xcc5,0xc5f,0xc67,0xc67,0x1ada, +0x1a04,0x1a9e,0xcad,0xcb5,0xa60,0x17f3,0x17f3,0xa68,0x17fb,0x17fb,0x17fb,0x17fb,0x17fb,0x17fb,0xa70,0x742, +0x1654,0x1676,0xa78,0x167e,0xa80,0x1686,0x168e,0x1696,0xa88,0xa8d,0x169e,0x16a5,0xa92,0x1803,0x1a24,0xc4f, +0xa9a,0x1700,0x1707,0x16ad,0x170f,0x1713,0x16b5,0x16b9,0x16d2,0x16d2,0x16d4,0x16c1,0x16c9,0x16c9,0x16ca,0x171b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b, +0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180b,0x180e,0x1970,0x1970, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc, +0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16dc,0x16e3,0x1b3b,0x1f16, +0x1816,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c, +0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c,0x181c, +0x181c,0x181c,0x181c,0x181c,0xaa2,0x1824,0xaaa,0x1b5b,0x1ae6,0x1ae6,0x1ae6,0x1ae6,0x1ae6,0x1ae6,0x1ae6,0x1ae6, +0x1ae2,0xd33,0x1af6,0x1aee,0x1af8,0x1b63,0x1b63,0xdc7,0x19dc,0x1a59,0x1aae,0x1ab2,0x1aa6,0x1c1b,0xce5,0xcec, +0x1a0c,0xcbd,0x1a61,0xcf4,0x1b00,0x1b03,0xd3b,0x1b6b,0x1b13,0x1b0b,0xd43,0xdcf,0x1b73,0x1b77,0xdd7,0x1014, +0x1b1b,0xd4b,0xd53,0x1b7f,0x1b8f,0x1b87,0xddf,0xf0d,0xe6b,0xe73,0x1da5,0xfc4,0x1e52,0x1e52,0x1b97,0xde7, +0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c, +0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e, +0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770, +0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b, +0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d, +0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f, +0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771, +0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c, +0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e, +0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770, +0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b, +0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d, +0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f, +0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771, +0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c, +0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e, +0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770, +0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b, +0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d, +0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f, +0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771, +0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0x1771,0x176b,0x176c,0x176d,0x176e,0x176f,0x1770,0xab2,0xdef,0xdf2, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743, +0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f, +0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x163f,0x16eb,0x16eb,0x16eb,0x16eb,0x16eb,0x16eb,0x16eb,0x16eb, +0x16f0,0x16f8,0x1930,0x139d,0x1a1c,0x1a1c,0x13a1,0x13a8,0xaba,0xac2,0xaca,0x15a1,0x15a8,0x15b0,0xad2,0x15b8, +0x15f6,0x15f6,0x1589,0x1591,0x15c0,0x15ed,0x15ee,0x15fe,0x15c8,0x15cd,0x15d5,0x15dd,0xada,0x15e5,0xae2,0x1599, +0xccd,0x1606,0xaea,0xaf2,0x160e,0x1614,0x1619,0xafa,0xb0a,0x165c,0x1664,0x1647,0x164c,0xb12,0xb1a,0xb02, +0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733, +0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x1733,0x173b,0x173b,0x173b,0x173b, +0x1578,0x1578,0x15b8,0x15f8,0x1638,0x1678,0x16b8,0x16f8,0x1734,0x1774,0x17a0,0x17e0,0x1820,0x1860,0x18a0,0x18e0, +0x1920,0x195c,0x199c,0x19dc,0x1a1c,0x1a50,0x1a8c,0x1acc,0x1b0c,0x1b4c,0x1b88,0x1bc8,0x1c08,0x1c48,0x1c88,0x1cc8, +0xe59,0xa80,0xac0,0xb00,0xb40,0xb6b,0xf99,0xa40,0xed9,0xa40,0xa40,0xa40,0xa40,0xbab,0x13f5,0x13f5, 0xf19,0xfd9,0xa40,0xa40,0xa40,0xbeb,0xf59,0xc2b,0xa40,0xc51,0xc91,0xcd1,0xd11,0xd51,0xe99,0xdc9, -0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322,0x1322, -0x1322,0x1322,0x1322,0x1322,0x1019,0x1362,0x1157,0x1197,0x13a2,0x11a2,0x1422,0x1422,0x1422,0x1059,0x1079,0x10b9, -0x1462,0x1462,0x11e2,0x14a2,0x10f9,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079, -0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1079,0x1117, +0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335, +0x1335,0x1335,0x1335,0x1335,0x1019,0x1375,0x116a,0x11aa,0x13b5,0x11b5,0x1435,0x1435,0x1435,0x1059,0x108c,0x10cc, +0x1475,0x1475,0x11f5,0x14b5,0x110c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c, +0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x112a, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd89, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, @@ -1629,1954 +1638,1958 @@ static const uint16_t propsVectorsTrie_index[32692]={ 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd89, 0xe09,0xe19,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd89, -0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2, -0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x1222, -0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2, -0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x12e2,0x1262, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0xc27,0xc2a,0xdf5,0x1deb,0x1017,0x745,0x559,0x10b1,0xcf7,0xd76,0x559,0x559,0x1d11,0xf10,0xf18,0x1e33, -0xc7a,0xc81,0xc89,0x1b95,0x1dcb,0x559,0x1dab,0xfe7,0x1b9d,0xdfd,0xe05,0xe0d,0x103f,0x74d,0x559,0x559, -0x1ba5,0x1ba5,0x755,0x559,0x1e60,0x10c9,0x1e58,0x10d1,0x1f4c,0x11cb,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0xe15,0x1fa4,0x12c4,0x1346,0x1347,0x1f6c,0x11f3,0x11fa,0x1201,0x1303,0x1307,0x127b,0x1211, -0x1c21,0x1c23,0xe76,0xe7d,0x1bad,0x1bb5,0xe1d,0xf30,0x1d09,0xef8,0xf00,0xfdf,0x1d29,0x1d2d,0x1d35,0x105f, -0xfaf,0x1d8b,0x75d,0x559,0x10b9,0x10c1,0x1d93,0xfb7,0xf91,0xf97,0xf9f,0xfa7,0x559,0x559,0x559,0x559, -0x1ed0,0x1ec8,0x113b,0x1143,0x1e13,0x1e0b,0x1087,0x559,0x559,0x559,0x559,0x559,0x1dfb,0x1047,0x104f,0x1057, -0x1dc3,0x1dbb,0xff7,0x1133,0x1d3d,0xf40,0x765,0x559,0x1097,0x109f,0x76d,0x559,0x559,0x559,0x559,0x559, -0x1f44,0x11ad,0x775,0x559,0x559,0x1e23,0x1e1b,0x108f,0x1283,0x1289,0x1291,0x559,0x559,0x1219,0x121d,0x1225, -0x1f04,0x1efc,0x1195,0x1ef4,0x1eec,0x1185,0x1df3,0x1037,0x1357,0x135a,0x135a,0x559,0x559,0x559,0x559,0x559, -0x10e9,0x10ee,0x10f6,0x10fd,0x1125,0x112b,0x559,0x559,0x1169,0x116d,0x1175,0x11bd,0x11c3,0x77d,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x11db,0x136a,0x136f,0x1377,0x559,0x559,0x781,0x1f8c,0x126b, -0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f, -0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a5f,0x1a64,0xcff,0xd06,0xd06,0xd06, -0x1a6c,0x1a6c,0x1a6c,0xd0e,0x1e50,0x1e50,0x1e50,0x1e50,0x1e50,0x1e50,0x789,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x78d,0x1fbc,0x1fbc,0x12d4,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b, -0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0x1c2b,0xe85,0xfff,0x1007,0x1fc4, -0x130f,0x1317,0xf48,0x1de3,0x1ddb,0x1027,0x102f,0x795,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1f64,0x1f5c,0x11eb, -0x559,0x559,0x559,0x1d21,0x1d21,0xf20,0x1d19,0xf28,0x559,0x559,0x111d,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x799,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1d73,0x1d73,0x1d73,0xf6c,0xf71, -0x7a1,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1fd4,0x1337,0x133e,0x1fcc,0x1fcc,0x1fcc,0x7a9, -0x559,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0xb2b,0x184f,0xb33,0x1850,0x1847,0x1858,0x185e,0x1866, -0xb3b,0x198e,0x198e,0x7b1,0x559,0x559,0x559,0x1362,0x11e3,0x197e,0x197e,0xc32,0xd16,0x559,0x559,0x559, -0x559,0x1897,0x189e,0xb43,0x18a1,0xb4b,0xb53,0xb5b,0x189b,0xb63,0xb6b,0xb73,0x18a0,0x18a8,0x1897,0x189e, -0x189a,0x18a1,0x18a9,0x1898,0x189f,0x189b,0xb7a,0x186e,0x1876,0x187d,0x1884,0x1871,0x1879,0x1880,0x1887,0xb82, -0x188f,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78,0x1e78, -0x1e78,0x1e68,0x1e6b,0x1e68,0x1e72,0x10d9,0x7b9,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x12f0,0x12f8,0x12fb,0x12fb,0x12fb,0x12fb,0x12fb, -0x12fb,0x110d,0x1115,0x1fdc,0x134f,0x7c1,0x559,0x559,0x559,0x1f84,0x122d,0x7c9,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x7cd,0x131f,0x1f94,0x1273,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x7d5,0x137f,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x12dc,0x1db3,0x1db3,0x1db3,0x1db3,0x1db3,0x1db3,0xfef,0x559,0x1ec0,0x1eb8,0x10e1,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x7dd,0x1f54,0x11d3,0x559,0x559,0x1235,0x1236,0x7e5,0x559,0x559,0x559,0x559, -0x559,0xebd,0xec5,0xecd,0xed5,0xedd,0xee5,0xeec,0xef0,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x7e9,0x1067,0x1e03,0x106d,0x1e03,0x1075,0x107a,0x107f, -0x107f,0x1e88,0x1ea8,0x1eb0,0x1f1c,0x1e90,0x1f74,0x1e98,0x1f24,0x1f7c,0x1f7c,0x119d,0x11a5,0x124d,0x1253,0x125b, -0x1263,0x1f9c,0x1f9c,0x1f9c,0x1f9c,0x12a7,0x1f9c,0x12ad,0x12b1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1, -0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1, -0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f1,0x7f2,0xb8a,0x18b1,0x18b1,0x18b1,0x7fa,0x7fa,0x7fa, -0x7fa,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x802,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa, -0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa, -0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa, -0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa, -0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0x7fa,0xbdd,0xbe4,0xbec,0xbf4,0x196e,0x196e,0x196e, -0xbfc,0xc04,0xc07,0x199e,0x1996,0xc3a,0xd56,0xd5a,0xd5e,0x559,0x559,0x559,0x559,0xd66,0x1b19,0xd6e, -0xf58,0x1822,0xb1d,0xb23,0x101f,0xc0f,0x19e2,0xca0,0x559,0x1837,0x182a,0x182f,0x1976,0xc17,0xc1f,0x114b, -0x1151,0x1d7b,0xf79,0x1d6b,0xf50,0x1327,0x132f,0x559,0x559,0x1da3,0x1da3,0x1da3,0x1da3,0x1da3,0x1da3,0x1da3, -0x1da3,0x1da3,0xfc7,0xfcf,0xfd7,0x12e4,0x12e8,0x559,0x559,0x1b21,0xd7e,0x1b29,0x1b29,0xd82,0xe8d,0xe95, -0xe9d,0x1bf1,0x1bd9,0x1bf9,0x1c01,0x1be9,0xe25,0xe29,0xe30,0xe38,0xe3c,0xe44,0xe4c,0xe4e,0xe4e,0xe4e, -0xe4e,0x1c62,0x1c6a,0x1c62,0x1c70,0x1c78,0x1c43,0x1c80,0x1c88,0x1c62,0x1c90,0x1c98,0x1c9f,0x1ca7,0x1c4b,0x1c62, -0x1cac,0x1c53,0x1c5a,0x1cb4,0x1cba,0x1d5c,0x1d63,0x1d55,0x1cc1,0x1cc9,0x1cd1,0x1cd9,0x1dd3,0x1ce1,0x1ce9,0xea5, -0xead,0x1c33,0x1c33,0x1c33,0xeb5,0x1d83,0x1d83,0xf81,0xf89,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e3b, -0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e3b,0x1e40,0x1e3b,0x1e3b,0x1e3b,0x10a7,0x10a9,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1159,0x1c3b,0x1f14,0x1f14,0x1f14,0x1f14,0x1f14,0x1f14, -0x1f14,0x1f34,0x1161,0x123e,0x1245,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c,0x1f3c, -0x117d,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd, -0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbf,0x1bbd,0x1bc7,0x1bbd,0x1bbd, -0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bca,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1bd1,0x1209,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0, -0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0,0x1ee0, -0x1ee4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1fb4,0x1299, -0x129f,0x12b9,0x12bc,0x12bc,0x12bc,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559, -0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x559,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18bc, -0x1387,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x138f,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4, -0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x18c4,0x13b6,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1393,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x139b,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1387, -0x1387,0x1387,0x1387,0x1387,0x1387,0x1387,0x1393,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x13be,0x1cf1,0x1cf1,0x1cf1,0x1cf1,0x1cf1,0x1cf1,0x13c6,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x13ce,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac, -0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x13d6,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1741,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1749,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751,0x1751, -0x1751,0x1751,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759,0x1759, -0x1759,0x1759,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9,0x18b9, -0x18b9,0x18b9,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09,0x1c09, -0x1c09,0x1c09,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80,0x1e80, -0x1e80,0x1e80,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8,0x1ed8, -0x1ed8,0x1ed8,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c,0x1f2c, -0x1f2c,0x1f2c,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac, -0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac, -0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac, -0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac,0x1fac, -0x1fac,0x1fac,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4,0x1fe4, -0x1fe4,0x1fe4,0x538,0x538,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e5,0x2ee,0x2e8, -0x2e8,0x2eb,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2, -0x2e2,0x2e2,0x2e2,0x2e2,0x7da,0x7d4,0x7b9,0x79e,0x7aa,0x7a7,0x79e,0x7b6,0x7a4,0x7b0,0x79e,0x7cb, -0x7c2,0x7b3,0x7d7,0x7ad,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x7bf,0x7bc, -0x7c5,0x7c5,0x7c5,0x7d4,0x79e,0x7e6,0x7e6,0x7e6,0x7e6,0x7e6,0x7e6,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0, -0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7a4, -0x7aa,0x7b0,0x7d1,0x798,0x7ce,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd, -0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7a4, -0x7c8,0x7a1,0x7c5,0x2e2,0,0,0,0,0,0,0,0,0,0,0,0, +0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5, +0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x12b5,0x1235, +0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5, +0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x12f5,0x1275, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0xc2c,0xc2f,0xdfa,0x1df5,0x101c,0x74a,0x55e,0x10b6,0xcfc,0xd7b,0x55e,0x55e,0x1d1b,0xf15,0xf1d,0x1e3d, +0xc7f,0xc86,0xc8e,0x1b9f,0x1dd5,0x55e,0x1db5,0xfec,0x1ba7,0xe02,0xe0a,0xe12,0x1044,0x752,0x55e,0x55e, +0x1baf,0x1baf,0x75a,0x55e,0x1e6a,0x10ce,0x1e62,0x10d6,0x1f56,0x11d0,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0xe1a,0x1fae,0x12c9,0x134b,0x134c,0x1f76,0x11f8,0x11ff,0x1206,0x1308,0x130c,0x1280,0x1216, +0x1c2b,0x1c2d,0xe7b,0xe82,0x1bb7,0x1bbf,0xe22,0xf35,0x1d13,0xefd,0xf05,0xfe4,0x1d33,0x1d37,0x1d3f,0x1064, +0xfb4,0x1d95,0x762,0x55e,0x10be,0x10c6,0x1d9d,0xfbc,0xf96,0xf9c,0xfa4,0xfac,0x55e,0x55e,0x55e,0x55e, +0x1eda,0x1ed2,0x1140,0x1148,0x1e1d,0x1e15,0x108c,0x55e,0x55e,0x55e,0x55e,0x55e,0x1e05,0x104c,0x1054,0x105c, +0x1dcd,0x1dc5,0xffc,0x1138,0x1d47,0xf45,0x76a,0x55e,0x109c,0x10a4,0x772,0x55e,0x55e,0x55e,0x55e,0x55e, +0x1f4e,0x11b2,0x77a,0x55e,0x55e,0x1e2d,0x1e25,0x1094,0x1288,0x128e,0x1296,0x55e,0x55e,0x121e,0x1222,0x122a, +0x1f0e,0x1f06,0x119a,0x1efe,0x1ef6,0x118a,0x1dfd,0x103c,0x135c,0x135f,0x135f,0x55e,0x55e,0x55e,0x55e,0x55e, +0x10ee,0x10f3,0x10fb,0x1102,0x112a,0x1130,0x55e,0x55e,0x116e,0x1172,0x117a,0x11c2,0x11c8,0x782,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x11e0,0x136f,0x1374,0x137c,0x55e,0x55e,0x786,0x1f96,0x1270, +0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69, +0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a69,0x1a6e,0xd04,0xd0b,0xd0b,0xd0b, +0x1a76,0x1a76,0x1a76,0xd13,0x1e5a,0x1e5a,0x1e5a,0x1e5a,0x1e5a,0x1e5a,0x78e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x792,0x1fc6,0x1fc6,0x12d9,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35, +0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0xe8a,0x1004,0x100c,0x1fce, +0x1314,0x131c,0xf4d,0x1ded,0x1de5,0x102c,0x1034,0x79a,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1f6e,0x1f66,0x11f0, +0x55e,0x55e,0x55e,0x1d2b,0x1d2b,0xf25,0x1d23,0xf2d,0x55e,0x55e,0x1122,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x79e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1d7d,0x1d7d,0x1d7d,0xf71,0xf76, +0x7a6,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1fde,0x133c,0x1343,0x1fd6,0x1fd6,0x1fd6,0x7ae, +0x55e,0x1849,0x1849,0x1849,0x1849,0x1849,0x1849,0x1849,0xb30,0x1859,0xb38,0x185a,0x1851,0x1862,0x1868,0x1870, +0xb40,0x1998,0x1998,0x7b6,0x55e,0x55e,0x55e,0x1367,0x11e8,0x1988,0x1988,0xc37,0xd1b,0x55e,0x55e,0x55e, +0x55e,0x18a1,0x18a8,0xb48,0x18ab,0xb50,0xb58,0xb60,0x18a5,0xb68,0xb70,0xb78,0x18aa,0x18b2,0x18a1,0x18a8, +0x18a4,0x18ab,0x18b3,0x18a2,0x18a9,0x18a5,0xb7f,0x1878,0x1880,0x1887,0x188e,0x187b,0x1883,0x188a,0x1891,0xb87, +0x1899,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82,0x1e82, +0x1e82,0x1e72,0x1e75,0x1e72,0x1e7c,0x10de,0x7be,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x12f5,0x12fd,0x1300,0x1300,0x1300,0x1300,0x1300, +0x1300,0x1112,0x111a,0x1fe6,0x1354,0x7c6,0x55e,0x55e,0x55e,0x1f8e,0x1232,0x7ce,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x7d2,0x1324,0x1f9e,0x1278,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x7da,0x1384,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x12e1,0x1dbd,0x1dbd,0x1dbd,0x1dbd,0x1dbd,0x1dbd,0xff4,0x55e,0x1eca,0x1ec2,0x10e6,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x7e2,0x1f5e,0x11d8,0x55e,0x55e,0x123a,0x123b,0x7ea,0x55e,0x55e,0x55e,0x55e, +0x55e,0xec2,0xeca,0xed2,0xeda,0xee2,0xeea,0xef1,0xef5,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x7ee,0x106c,0x1e0d,0x1072,0x1e0d,0x107a,0x107f,0x1084, +0x1084,0x1e92,0x1eb2,0x1eba,0x1f26,0x1e9a,0x1f7e,0x1ea2,0x1f2e,0x1f86,0x1f86,0x11a2,0x11aa,0x1252,0x1258,0x1260, +0x1268,0x1fa6,0x1fa6,0x1fa6,0x1fa6,0x12ac,0x1fa6,0x12b2,0x12b6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6, +0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6, +0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f6,0x7f7,0xb8f,0x18bb,0x18bb,0x18bb,0x7ff,0x7ff,0x7ff, +0x7ff,0x1990,0x1990,0x1990,0x1990,0x1990,0x1990,0x1990,0x807,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff, +0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff, +0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff, +0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff, +0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0x7ff,0xbe2,0xbe9,0xbf1,0xbf9,0x1978,0x1978,0x1978, +0xc01,0xc09,0xc0c,0x19a8,0x19a0,0xc3f,0xd5b,0xd5f,0xd63,0x55e,0x55e,0x55e,0x55e,0xd6b,0x1b23,0xd73, +0xf5d,0x182c,0xb22,0xb28,0x1024,0xc14,0x19ec,0xca5,0x55e,0x1841,0x1834,0x1839,0x1980,0xc1c,0xc24,0x1150, +0x1156,0x1d85,0xf7e,0x1d75,0xf55,0x132c,0x1334,0x55e,0x55e,0x1dad,0x1dad,0x1dad,0x1dad,0x1dad,0x1dad,0x1dad, +0x1dad,0x1dad,0xfcc,0xfd4,0xfdc,0x12e9,0x12ed,0x55e,0x55e,0x1b2b,0xd83,0x1b33,0x1b33,0xd87,0xe92,0xe9a, +0xea2,0x1bfb,0x1be3,0x1c03,0x1c0b,0x1bf3,0xe2a,0xe2e,0xe35,0xe3d,0xe41,0xe49,0xe51,0xe53,0xe53,0xe53, +0xe53,0x1c6c,0x1c74,0x1c6c,0x1c7a,0x1c82,0x1c4d,0x1c8a,0x1c92,0x1c6c,0x1c9a,0x1ca2,0x1ca9,0x1cb1,0x1c55,0x1c6c, +0x1cb6,0x1c5d,0x1c64,0x1cbe,0x1cc4,0x1d66,0x1d6d,0x1d5f,0x1ccb,0x1cd3,0x1cdb,0x1ce3,0x1ddd,0x1ceb,0x1cf3,0xeaa, +0xeb2,0x1c3d,0x1c3d,0x1c3d,0xeba,0x1d8d,0x1d8d,0xf86,0xf8e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1e45,0x1e45,0x1e45,0x1e45,0x1e45,0x1e45,0x1e45, +0x1e45,0x1e45,0x1e45,0x1e45,0x1e45,0x1e45,0x1e45,0x1e4a,0x1e45,0x1e45,0x1e45,0x10ac,0x10ae,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x115e,0x1c45,0x1f1e,0x1f1e,0x1f1e,0x1f1e,0x1f1e,0x1f1e, +0x1f1e,0x1f3e,0x1166,0x1243,0x124a,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46,0x1f46, +0x1182,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7, +0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc9,0x1bc7,0x1bd1,0x1bc7,0x1bc7, +0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bd4,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bc7,0x1bdb,0x120e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea, +0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea,0x1eea, +0x1eee,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x1fbe,0x129e, +0x12a4,0x12be,0x12c1,0x12c1,0x12c1,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e, +0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x55e,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c6, +0x138c,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x13d0,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6, +0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x1ff6,0x13e0,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x18ce,0x18ce,0x18ce,0x18ce, +0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x18ce,0x13b0,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138d,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1395,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138d,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x13b8,0x1cfb,0x1cfb,0x1cfb,0x1cfb,0x1cfb,0x1cfb,0x13c0,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x13c8,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6, +0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x13d8, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x174b,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1753,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, +0x175b,0x175b,0x175b,0x175b,0x175b,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763,0x1763, +0x1763,0x1763,0x1763,0x1763,0x1763,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x18c3, +0x18c3,0x18c3,0x18c3,0x18c3,0x18c3,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1c13, +0x1c13,0x1c13,0x1c13,0x1c13,0x1c13,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a, +0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1e8a,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2, +0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1ee2,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1f36, +0x1f36,0x1f36,0x1f36,0x1f36,0x1f36,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6, +0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6, +0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6, +0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6, +0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fb6,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x1fee, +0x1fee,0x1fee,0x1fee,0x1fee,0x1fee,0x53d,0x53d,0x53d,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2, +0x2e2,0x2e5,0x2ee,0x2e8,0x2e8,0x2eb,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2, +0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x2e2,0x7da,0x7d4,0x7b9,0x79e,0x7aa,0x7a7,0x79e,0x7b6, +0x7a4,0x7b0,0x79e,0x7cb,0x7c2,0x7b3,0x7d7,0x7ad,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b, +0x79b,0x79b,0x7bf,0x7bc,0x7c5,0x7c5,0x7c5,0x7d4,0x79e,0x7e6,0x7e6,0x7e6,0x7e6,0x7e6,0x7e6,0x7e0, +0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0, +0x7e0,0x7e0,0x7e0,0x7a4,0x7aa,0x7b0,0x7d1,0x798,0x7ce,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3,0x7dd, +0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd, +0x7dd,0x7dd,0x7dd,0x7a4,0x7c8,0x7a1,0x7c5,0x2e2,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x300,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1, +0,0,0,0,0,0,0,0,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x300,0x2f1,0x2f1, 0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1, -0x2f1,0x2f1,0x2f1,0x2f1,0x2f4,0x64b,0x7ef,0x7f2,0x651,0x7f2,0x7ec,0x645,0x63c,0x2fa,0x65a,0x2fd, -0x7f5,0x633,0x648,0x7e9,0x64e,0x657,0x639,0x639,0x63f,0x2f7,0x645,0x642,0x63c,0x639,0x65a,0x2fd, -0x636,0x636,0x636,0x64b,0x306,0x306,0x306,0x306,0x306,0x306,0x663,0x306,0x306,0x306,0x306,0x306, -0x306,0x306,0x306,0x306,0x663,0x306,0x306,0x306,0x306,0x306,0x306,0x654,0x663,0x306,0x306,0x306, -0x306,0x306,0x663,0x65d,0x660,0x660,0x303,0x303,0x303,0x303,0x65d,0x303,0x660,0x660,0x660,0x303, -0x660,0x660,0x303,0x303,0x65d,0x303,0x660,0x660,0x303,0x303,0x303,0x654,0x65d,0x660,0x660,0x303, -0x660,0x303,0x65d,0x303,0x312,0x669,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309, -0x312,0x309,0x312,0x309,0x30f,0x666,0x312,0x669,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x669, -0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x66f,0x666,0x312,0x309,0x312,0x669, -0x312,0x309,0x312,0x309,0x312,0x666,0x672,0x66c,0x312,0x309,0x312,0x309,0x666,0x312,0x309,0x312, -0x309,0x312,0x309,0x672,0x66c,0x66f,0x666,0x312,0x669,0x312,0x309,0x312,0x669,0x675,0x66f,0x666, -0x312,0x669,0x312,0x309,0x312,0x309,0x66f,0x666,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309, -0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x66f,0x666,0x312,0x309,0x312,0x669, -0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x312,0x309,0x312, -0x309,0x312,0x309,0x30c,0x315,0x321,0x321,0x315,0x321,0x315,0x321,0x321,0x315,0x321,0x321,0x321, -0x315,0x315,0x321,0x321,0x321,0x321,0x315,0x321,0x321,0x315,0x321,0x321,0x321,0x315,0x315,0x315, -0x321,0x321,0x315,0x321,0x324,0x318,0x321,0x315,0x321,0x315,0x321,0x321,0x315,0x321,0x315,0x315, -0x321,0x315,0x321,0x324,0x318,0x321,0x321,0x321,0x315,0x321,0x315,0x321,0x321,0x315,0x315,0x31e, -0x321,0x315,0x315,0x315,0x31e,0x31e,0x31e,0x31e,0x327,0x327,0x31b,0x327,0x327,0x31b,0x327,0x327, -0x31b,0x324,0x678,0x324,0x678,0x324,0x678,0x324,0x678,0x324,0x678,0x324,0x678,0x324,0x678,0x324, -0x678,0x315,0x324,0x318,0x324,0x318,0x324,0x318,0x321,0x315,0x324,0x318,0x324,0x318,0x324,0x318, -0x324,0x318,0x324,0x318,0x318,0x327,0x327,0x31b,0x324,0x318,0x9cf,0x9cf,0x9d2,0x9cc,0x324,0x318, -0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318, -0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x9d2,0x9cc,0x9d2,0x9cc,0x9cf,0x9c9,0x9d2,0x9cc, -0xb8b,0xc84,0x9cf,0x9c9,0x9cf,0x9c9,0x9d2,0x9cc,0x9d2,0x9cc,0x9d2,0x9cc,0x9d2,0x9cc,0x9d2,0x9cc, -0x9d2,0x9cc,0x9d2,0x9cc,0xc84,0xc84,0xc84,0xd7d,0xd7d,0xd7d,0xd80,0xd80,0xd7d,0xd80,0xd80,0xd7d, -0xd7d,0xd80,0xebe,0xec1,0xec1,0xec1,0xec1,0xebe,0xec1,0xebe,0xec1,0xebe,0xec1,0xebe,0xec1,0xebe, -0x32a,0x67b,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, -0x32a,0x67b,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, +0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f1,0x2f4,0x64b,0x7ef,0x7f2,0x651,0x7f2,0x7ec,0x645, +0x63c,0x2fa,0x65a,0x2fd,0x7f5,0x633,0x648,0x7e9,0x64e,0x657,0x639,0x639,0x63f,0x2f7,0x645,0x642, +0x63c,0x639,0x65a,0x2fd,0x636,0x636,0x636,0x64b,0x306,0x306,0x306,0x306,0x306,0x306,0x663,0x306, +0x306,0x306,0x306,0x306,0x306,0x306,0x306,0x306,0x663,0x306,0x306,0x306,0x306,0x306,0x306,0x654, +0x663,0x306,0x306,0x306,0x306,0x306,0x663,0x65d,0x660,0x660,0x303,0x303,0x303,0x303,0x65d,0x303, +0x660,0x660,0x660,0x303,0x660,0x660,0x303,0x303,0x65d,0x303,0x660,0x660,0x303,0x303,0x303,0x654, +0x65d,0x660,0x660,0x303,0x660,0x303,0x65d,0x303,0x312,0x669,0x312,0x309,0x312,0x309,0x312,0x309, +0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x30f,0x666,0x312,0x669,0x312,0x309,0x312,0x309, +0x312,0x309,0x312,0x669,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x66f,0x666, +0x312,0x309,0x312,0x669,0x312,0x309,0x312,0x309,0x312,0x666,0x672,0x66c,0x312,0x309,0x312,0x309, +0x666,0x312,0x309,0x312,0x309,0x312,0x309,0x672,0x66c,0x66f,0x666,0x312,0x669,0x312,0x309,0x312, +0x669,0x675,0x66f,0x666,0x312,0x669,0x312,0x309,0x312,0x309,0x66f,0x666,0x312,0x309,0x312,0x309, +0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x66f,0x666, +0x312,0x309,0x312,0x669,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309,0x312,0x309, +0x312,0x312,0x309,0x312,0x309,0x312,0x309,0x30c,0x315,0x321,0x321,0x315,0x321,0x315,0x321,0x321, +0x315,0x321,0x321,0x321,0x315,0x315,0x321,0x321,0x321,0x321,0x315,0x321,0x321,0x315,0x321,0x321, +0x321,0x315,0x315,0x315,0x321,0x321,0x315,0x321,0x324,0x318,0x321,0x315,0x321,0x315,0x321,0x321, +0x315,0x321,0x315,0x315,0x321,0x315,0x321,0x324,0x318,0x321,0x321,0x321,0x315,0x321,0x315,0x321, +0x321,0x315,0x315,0x31e,0x321,0x315,0x315,0x315,0x31e,0x31e,0x31e,0x31e,0x327,0x327,0x31b,0x327, +0x327,0x31b,0x327,0x327,0x31b,0x324,0x678,0x324,0x678,0x324,0x678,0x324,0x678,0x324,0x678,0x324, +0x678,0x324,0x678,0x324,0x678,0x315,0x324,0x318,0x324,0x318,0x324,0x318,0x321,0x315,0x324,0x318, +0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x318,0x327,0x327,0x31b,0x324,0x318,0x9cf,0x9cf, +0x9d2,0x9cc,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318, +0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x324,0x318,0x9d2,0x9cc,0x9d2,0x9cc, +0x9cf,0x9c9,0x9d2,0x9cc,0xb8e,0xc87,0x9cf,0x9c9,0x9cf,0x9c9,0x9d2,0x9cc,0x9d2,0x9cc,0x9d2,0x9cc, +0x9d2,0x9cc,0x9d2,0x9cc,0x9d2,0x9cc,0x9d2,0x9cc,0xc87,0xc87,0xc87,0xd80,0xd80,0xd80,0xd83,0xd83, +0xd80,0xd83,0xd83,0xd80,0xd80,0xd83,0xec1,0xec4,0xec4,0xec4,0xec4,0xec1,0xec4,0xec1,0xec4,0xec1, +0xec4,0xec1,0xec4,0xec1,0x32a,0x67b,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, +0x32a,0x32a,0x32a,0x32a,0x32a,0x67b,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, 0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, -0x32d,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, -0x32a,0x32a,0x32a,0x32a,0x32a,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0xc87,0xc87,0x342,0x342,0x342,0x342, -0x342,0x342,0x342,0x342,0x342,0x339,0x339,0x339,0x339,0x339,0x339,0x339,0x336,0x336,0x333,0x333, -0x681,0x333,0x339,0x684,0x33c,0x684,0x684,0x684,0x33c,0x684,0x339,0x339,0x687,0x33f,0x333,0x333, -0x333,0x333,0x333,0x333,0x67e,0x67e,0x67e,0x67e,0x330,0x67e,0x333,0xb01,0x342,0x342,0x342,0x342, -0x342,0x333,0x333,0x333,0x333,0x333,0x9de,0x9de,0x9db,0x9d8,0x9db,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a, -0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0xc8a,0x68a,0x68a,0x68a,0x68a, +0x32a,0x32a,0x32a,0x32a,0x32d,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a, +0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0xc8a,0xc8a, +0x342,0x342,0x342,0x342,0x342,0x342,0x342,0x342,0x342,0x339,0x339,0x339,0x339,0x339,0x339,0x339, +0x336,0x336,0x333,0x333,0x681,0x333,0x339,0x684,0x33c,0x684,0x684,0x684,0x33c,0x684,0x339,0x339, +0x687,0x33f,0x333,0x333,0x333,0x333,0x333,0x333,0x67e,0x67e,0x67e,0x67e,0x330,0x67e,0x333,0xb04, +0x342,0x342,0x342,0x342,0x342,0x333,0x333,0x333,0x333,0x333,0x9de,0x9de,0x9db,0x9d8,0x9db,0xc8d, +0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d, 0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a, 0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a, 0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a, -0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68d,0x68d,0x92d,0x68d, -0x68d,0x930,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xc3c,0xd47,0xd47,0xd47,0xd47, -0xd47,0xd47,0xd47,0xd47,0xe82,0xe82,0xe82,0xe82,0xe85,0xd4a,0xd4a,0xd4a,0x690,0x690,0xb07,0xc81, -0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xc81,0xf6c,0xf69,0xf6c,0xf69, -0x34e,0x357,0xf6c,0xf69,9,9,0x35d,0xec4,0xec4,0xec4,0x345,0x14af,9,9,9,9, -0x35a,0x348,0x36c,0x34b,0x36c,0x36c,0x36c,9,0x36c,9,0x36c,0x36c,0x363,0x696,0x696,0x696, -0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,9,0x696, -0x696,0x696,0x696,0x696,0x696,0x696,0x36c,0x36c,0x363,0x363,0x363,0x363,0x363,0x693,0x693,0x693, -0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x360,0x693, -0x693,0x693,0x693,0x693,0x693,0x693,0x363,0x363,0x363,0x363,0x363,0xf6c,0x36f,0x36f,0x372,0x36c, -0x36c,0x36f,0x366,0x9e1,0xb94,0xb91,0x369,0x9e1,0x369,0x9e1,0x369,0x9e1,0x369,0x9e1,0x354,0x351, -0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351,0x36f,0x36f,0x366,0x360, -0xb43,0xb40,0xb8e,0xc90,0xc8d,0xc93,0xc90,0xc8d,0xd83,0xd86,0xd86,0xd86,0x9f0,0x6a2,0x37e,0x381, -0x37e,0x37e,0x37e,0x381,0x37e,0x37e,0x37e,0x37e,0x381,0x9f0,0x381,0x37e,0x69f,0x69f,0x69f,0x69f, -0x69f,0x69f,0x69f,0x69f,0x69f,0x6a2,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f, -0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x699,0x699,0x699,0x699, -0x699,0x699,0x699,0x699,0x699,0x69c,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699, -0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x9ea,0x69c,0x378,0x37b,0x378,0x378,0x378,0x37b, -0x378,0x378,0x378,0x378,0x37b,0x9ea,0x37b,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378, -0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x381,0x37b, -0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x375,0x939,0x93c,0x91e,0x91e,0x1116, -0x9e4,0x9e4,0xb9a,0xb97,0x9ed,0x9e7,0x9ed,0x9e7,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378, +0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a,0x68a, +0x68d,0x68d,0x92d,0x68d,0x68d,0x930,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xc3f, +0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xe85,0xe85,0xe85,0xe85,0xe88,0xd4d,0xd4d,0xd4d, +0x690,0x690,0xb0a,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84,0xc84, +0xf6f,0xf6c,0xf6f,0xf6c,0x34e,0x357,0xf6f,0xf6c,9,9,0x35d,0xec7,0xec7,0xec7,0x345,0x14b8, +9,9,9,9,0x35a,0x348,0x36c,0x34b,0x36c,0x36c,0x36c,9,0x36c,9,0x36c,0x36c, +0x363,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696, +0x696,0x696,9,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x36c,0x36c,0x363,0x363,0x363,0x363, +0x363,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693, +0x693,0x693,0x360,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x363,0x363,0x363,0x363,0x363,0xf6f, +0x36f,0x36f,0x372,0x36c,0x36c,0x36f,0x366,0x9e1,0xb97,0xb94,0x369,0x9e1,0x369,0x9e1,0x369,0x9e1, +0x369,0x9e1,0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351,0x354,0x351, +0x36f,0x36f,0x366,0x360,0xb46,0xb43,0xb91,0xc93,0xc90,0xc96,0xc93,0xc90,0xd86,0xd89,0xd89,0xd89, +0x9f0,0x6a2,0x37e,0x381,0x37e,0x37e,0x37e,0x381,0x37e,0x37e,0x37e,0x37e,0x381,0x9f0,0x381,0x37e, +0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x6a2,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f, +0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f, +0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x69c,0x699,0x699,0x699,0x699,0x699,0x699, +0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x9ea,0x69c,0x378,0x37b, +0x378,0x378,0x378,0x37b,0x378,0x378,0x378,0x378,0x37b,0x9ea,0x37b,0x378,0x37e,0x378,0x37e,0x378, +0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378, +0x37e,0x378,0x381,0x37b,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x375,0x939, +0x93c,0x91e,0x91e,0x111c,0x9e4,0x9e4,0xb9d,0xb9a,0x9ed,0x9e7,0x9ed,0x9e7,0x37e,0x378,0x37e,0x378, 0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378, 0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378, -0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x381,0x37b,0x37e,0x378,0xb9a,0xb97,0x37e, -0x378,0xb9a,0xb97,0x37e,0x378,0xb9a,0xb97,0xec7,0x381,0x37b,0x381,0x37b,0x37e,0x378,0x381,0x37b, -0x37e,0x378,0x381,0x37b,0x381,0x37b,0x381,0x37b,0x37e,0x378,0x381,0x37b,0x381,0x37b,0x381,0x37b, -0x37e,0x378,0x381,0x37b,0x9f0,0x9ea,0x381,0x37b,0x381,0x37b,0x381,0x37b,0x381,0x37b,0xd8c,0xd89, -0x381,0x37b,0xeca,0xec7,0xeca,0xec7,0xeca,0xec7,0xc00,0xbfd,0xc00,0xbfd,0xc00,0xbfd,0xc00,0xbfd, -0xc00,0xbfd,0xc00,0xbfd,0xc00,0xbfd,0xc00,0xbfd,0xef7,0xef4,0xef7,0xef4,0xfe7,0xfe4,0xfe7,0xfe4, -0xfe7,0xfe4,0xfe7,0xfe4,0xfe7,0xfe4,0xfe7,0xfe4,0xfe7,0xfe4,0xfe7,0xfe4,0x114f,0x114c,0x1329,0x1326, -0x14e5,0x14e2,0x14e5,0x14e2,0x14e5,0x14e2,0x14e5,0x14e2,0xc,0x393,0x393,0x393,0x393,0x393,0x393,0x393, +0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x378,0x37e,0x381,0x37b,0x37e, +0x378,0xb9d,0xb9a,0x37e,0x378,0xb9d,0xb9a,0x37e,0x378,0xb9d,0xb9a,0xeca,0x381,0x37b,0x381,0x37b, +0x37e,0x378,0x381,0x37b,0x37e,0x378,0x381,0x37b,0x381,0x37b,0x381,0x37b,0x37e,0x378,0x381,0x37b, +0x381,0x37b,0x381,0x37b,0x37e,0x378,0x381,0x37b,0x9f0,0x9ea,0x381,0x37b,0x381,0x37b,0x381,0x37b, +0x381,0x37b,0xd8f,0xd8c,0x381,0x37b,0xecd,0xeca,0xecd,0xeca,0xecd,0xeca,0xc03,0xc00,0xc03,0xc00, +0xc03,0xc00,0xc03,0xc00,0xc03,0xc00,0xc03,0xc00,0xc03,0xc00,0xc03,0xc00,0xefa,0xef7,0xefa,0xef7, +0xfea,0xfe7,0xfea,0xfe7,0xfea,0xfe7,0xfea,0xfe7,0xfea,0xfe7,0xfea,0xfe7,0xfea,0xfe7,0xfea,0xfe7, +0x1155,0x1152,0x132f,0x132c,0x14ee,0x14eb,0x14ee,0x14eb,0x14ee,0x14eb,0x14ee,0x14eb,0xc,0x393,0x393,0x393, 0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393, -0x393,0x393,0x393,0xc,0xc,0x396,0x384,0x384,0x384,0x38a,0x384,0x387,0x18ea,0x38d,0x38d,0x38d, +0x393,0x393,0x393,0x393,0x393,0x393,0x393,0xc,0xc,0x396,0x384,0x384,0x384,0x38a,0x384,0x387, +0x18f9,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d, 0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d, -0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x390, -0x18ea,0x399,0x9f3,0xc,0xc,0x14b2,0x14b2,0x13ce,0xf,0x960,0x960,0x960,0x960,0x960,0x960,0x960, -0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0xd8f,0x960,0x960,0x960,0x960,0x960, -0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, -0x39c,0x39c,0xecd,0x39c,0x39c,0x39c,0x3a8,0x39c,0x39f,0x39c,0x39c,0x3ab,0x963,0xd92,0xd95,0xd92, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae, +0x38d,0x38d,0x38d,0x390,0x18f9,0x399,0x9f3,0xc,0xc,0x14bb,0x14bb,0x13d7,0xf,0x960,0x960,0x960, +0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0xd92,0x960, +0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xed0,0x39c,0x39c,0x39c,0x3a8,0x39c,0x39f,0x39c,0x39c,0x3ab, +0x963,0xd95,0xd98,0xd95,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3ae,0x3ae,0x3ae,0x3ae, 0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae, -0x3ae,0x3ae,0x3ae,0xf,0xf,0xf,0xf,0x18ed,0x3ae,0x3ae,0x3ae,0x3a5,0x3a2,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xca8,0xca8,0xca8,0xca8,0x13d1,0x14b5,0xf75,0xf75, -0xf75,0xf72,0xf72,0xd9b,0x8a6,0xca2,0xc9f,0xc9f,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xf6f,0xf6f, -0xf6f,0xf6f,0xf6f,0x8a3,0x14ac,0x1afd,0xd9e,0x8a9,0x12f0,0x3c9,0x3cc,0x3cc,0x3cc,0x3cc,0x3cc,0x3c9, -0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, -0x3c9,0x3c9,0x3c9,0xf78,0xf78,0xf78,0xf78,0xf78,0x8ac,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, -0x3c9,0x3c9,0x3c9,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0xb3a,0xb3a,0xb3a,0xc96,0xc9c, -0xc99,0xd98,0xd98,0xd98,0xd98,0xd98,0xd98,0x12ed,0x93f,0x93f,0x93f,0x93f,0x93f,0x93f,0x93f,0x93f, -0x93f,0x93f,0x3c3,0x3c0,0x3bd,0x3ba,0xb9d,0xb9d,0x921,0x3c9,0x3c9,0x3d5,0x3c9,0x3cf,0x3cf,0x3cf, -0x3cf,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xf,0xf,0xf,0xf,0x18fc,0x3ae,0x3ae,0x3ae,0x3a5, +0x3a2,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xcab,0xcab,0xcab,0xcab, +0x13da,0x14be,0xf78,0xf78,0xf78,0xf75,0xf75,0xd9e,0x8a6,0xca5,0xca2,0xca2,0xc99,0xc99,0xc99,0xc99, +0xc99,0xc99,0xf72,0xf72,0xf72,0xf72,0xf72,0x8a3,0x14b5,0x1b0f,0xda1,0x8a9,0x12f6,0x3c9,0x3cc,0x3cc, +0x3cc,0x3cc,0x3cc,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0x8ac,0x3c9,0x3c9,0x3c9, +0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0xb3d, +0xb3d,0xb3d,0xc99,0xc9f,0xc9c,0xd9b,0xd9b,0xd9b,0xd9b,0xd9b,0xd9b,0x12f3,0x93f,0x93f,0x93f,0x93f, +0x93f,0x93f,0x93f,0x93f,0x93f,0x93f,0x3c3,0x3c0,0x3bd,0x3ba,0xba0,0xba0,0x921,0x3c9,0x3c9,0x3d5, +0x3c9,0x3cf,0x3cf,0x3cf,0x3cf,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, 0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, 0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, -0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x9f9,0x9f9,0x3c9,0x3c9, -0x3c9,0x3c9,0x3c9,0x9f9,0x3cc,0x3c9,0x3cc,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, -0x3c9,0x3c9,0x3c9,0x9f9,0x3c9,0x3c9,0x3c9,0x3cc,0x942,0x3c9,0x3b4,0x3b4,0x3b4,0x3b4,0x3b4,0x3b4, -0x3b4,0x3b1,0x3ba,0x3b7,0x3b7,0x3b4,0x3b4,0x3b4,0x3b4,0x3d2,0x3d2,0x3b4,0x3b4,0x3ba,0x3b7,0x3b7, -0x3b7,0x3b4,0xca5,0xca5,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x9f9,0x9f9, -0x9f9,0x9f6,0x9f6,0xca5,0xa0e,0xa0e,0xa0e,0xa08,0xa08,0xa08,0xa08,0xa08,0xa08,0xa08,0xa08,0xa05, -0xa08,0xa05,0x12,0xa11,0xa0b,0x9fc,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b, +0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x9f9,0x9f9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x9f9,0x3cc,0x3c9,0x3cc,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x9f9,0x3c9,0x3c9,0x3c9,0x3cc,0x942,0x3c9,0x3b4,0x3b4, +0x3b4,0x3b4,0x3b4,0x3b4,0x3b4,0x3b1,0x3ba,0x3b7,0x3b7,0x3b4,0x3b4,0x3b4,0x3b4,0x3d2,0x3d2,0x3b4, +0x3b4,0x3ba,0x3b7,0x3b7,0x3b7,0x3b4,0xca8,0xca8,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6, +0x3c6,0x3c6,0x9f9,0x9f9,0x9f9,0x9f6,0x9f6,0xca8,0xa0e,0xa0e,0xa0e,0xa08,0xa08,0xa08,0xa08,0xa08, +0xa08,0xa08,0xa08,0xa05,0xa08,0xa05,0x12,0xa11,0xa0b,0x9fc,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b, 0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b, -0xa0b,0xcab,0xcab,0xcab,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02, -0xa02,0xa02,0xa02,0xa02,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x12, -0x12,0xcab,0xcab,0xcab,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, -0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, -0xdfb,0xdfb,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9, -0xff9,0xff9,0xff9,0xff9,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17, +0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xcae,0xcae,0xcae,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02, +0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0xa02,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff, +0x9ff,0x9ff,0x9ff,0x12,0x12,0xcae,0xcae,0xcae,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe, +0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe, +0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc, +0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17, 0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17, -0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa14,0xa14,0xa14,0xa14,0xa14,0xa14, -0xa14,0xa14,0xa14,0xa14,0xa14,0xba0,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15, -0x15,0x15,0x15,0x15,0xf0f,0xf0f,0xf0f,0xf0f,0xf0f,0xf0f,0xf0f,0xf0f,0xf0f,0xf0f,0xf12,0xf12, -0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12, -0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf06, -0xf06,0xf06,0xf06,0xf06,0xf06,0xf06,0xf06,0xf06,0xf15,0xf15,0xf09,0xf09,0xf0c,0xf1b,0xf18,0x102, -0x102,0x1911,0x1914,0x1914,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb13,0xb13,0xb16,0xb16,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, -0x6f,0x6f,0x6f,0x6f,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1617,0x1617,0x1617,0x1617,0x1617, -0x1617,0x1617,0x1617,0x1617,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1650,0x1650,0x1650, -0x1650,0x1650,0x1650,0x1650,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x16b,0x16b,0x16b, -0x16b,0x16b,0x16b,0x16b,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1bdb,0x1bd8,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2, -0x1c2,0x1c2,0x1c2,0x1c2,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1a7,0x1a7, -0x1a7,0x1a7,0x1a7,0x1a7,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x201,0x201,0x201,0x201,0x201, -0x201,0x201,0x201,0x201,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249, -0x249,0x249,0x249,0x249,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x195f,0x195f,0x195f,0x195f,0x195f,0x195f,0x195f,0x195f,0x195f,0x195f,0x24f,0x24f, -0x24f,0x24f,0x24f,0x24f,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1ac1,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, -0x28b,0x28b,0x28b,0x28b,0x1752,0x1752,0x1752,0x1752,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207, -0x207,0x207,0x207,0x207,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c, -0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e, -0x163e,0x163e,0x163e,0x163e,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1bc3,0x1bc3,0x1bc3,0x1bc3,0x2a0,0x1bc3,0x1bc3,0x1bc3,0x1bc3,0x1bc3,0x1bc3,0x1bc3, -0x2a0,0x1bc3,0x1bc3,0x2a0,0x16b6,0x16b6,0x16b6,0x16b6,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef, -0x1ef,0x1ef,0x1ef,0x1ef,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5, -0x2b5,0x2b5,0x2b5,0x2b5,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xdf2,0xdf2,0xdef,0xdef,0xdef,0xdf2,0xd5,0xd5,0xd5,0xd5,0xd5,0xd5, -0xd5,0xd5,0xd5,0xd5,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x213,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a, -0x176a,0x176a,0x176a,0x176a,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, -0x2bb,0x2bb,0x2bb,0x1bf6,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x270,0x270, -0x270,0x270,0x1a16,0x1a10,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93, -0x1b93,0x1b93,0x1b93,0x1b93,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26, -0x1c26,0x1c26,0x1c26,0x1c26,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x255,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974, -0x1974,0x1974,0x1974,0x1974,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, -0x273,0x273,0x273,0x273,0,0,0,0,0,0,0,0,0,0,0,0, +0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa14,0xa14, +0xa14,0xa14,0xa14,0xa14,0xa14,0xa14,0xa14,0xa14,0xa14,0xba3,0x15,0x15,0x15,0x15,0x15,0x15, +0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12, +0xf12,0xf12,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15, +0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15, +0xf15,0xf15,0xf15,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf18,0xf18,0xf0c,0xf0c, +0xf0f,0xf1e,0xf1b,0xff,0xff,0x1920,0x1923,0x1923,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb16,0xb16,0xb19,0xb19,0xb16,0xb16,0xb16,0xb16, +0xb16,0xb16,0xb16,0xb16,0x1c53,0x1c53,0x1c50,0x1c50,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1626, +0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9, +0x1e9,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248, +0x1248,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1bed,0x1bea,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf, +0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491, +0x1491,0x1491,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1fe, +0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246, +0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e, +0x196e,0x196e,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1ad3,0x288,0x288,0x288,0x288,0x288,0x288,0x288, +0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x1761,0x1761,0x1761,0x1761,0x204,0x204,0x204,0x204, +0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e, +0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1bd5,0x1bd5,0x1bd5,0x1bd5,0x29d,0x1bd5,0x1bd5,0x1bd5, +0x1bd5,0x1bd5,0x1bd5,0x1bd5,0x29d,0x1bd5,0x1bd5,0x29d,0x16c5,0x16c5,0x16c5,0x16c5,0x1ec,0x1ec,0x1ec,0x1ec, +0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x2b2,0x2b2,0x2b2,0x2b2, +0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xdf5,0xdf5,0xdf2,0xdf2,0xdf2,0xdf5,0xd2,0xd2, +0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x210,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779, +0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8, +0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x1c08,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1a25,0x1a25,0x1a25,0x1a25,0x1a25,0x1a25,0x1a25,0x1a25, +0x1a25,0x1a25,0x26d,0x26d,0x26d,0x26d,0x1a28,0x1a22,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5, +0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38, +0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x252,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983, +0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270, +0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0x95d,0x95d,3,3,3,3,3,3,3,3,3,3,3,3, +0,0,0,0,0,0,0x95d,0x95d,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,0x95d,0x95d,6,6,6,6,6,6,6,6, +3,3,3,3,3,3,3,3,3,3,0x95d,0x95d,6,6,6,6, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50, -0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,0x14bb,0x3f0,0x3ff,0x3ff,0x18,0x405,0x405,0x405, -0x405,0x405,0x405,0x405,0x405,0x18,0x18,0x405,0x405,0x18,0x18,0x405,0x405,0x405,0x405,0x405, -0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x18,0x405,0x405,0x405,0x405,0x405,0x405, -0x405,0x18,0x405,0x18,0x18,0x18,0x405,0x405,0x405,0x405,0x18,0x18,0x3f3,0xcb1,0x3f0,0x3ff, -0x3ff,0x3f0,0x3f0,0x3f0,0x3f0,0x18,0x18,0x3ff,0x3ff,0x18,0x18,0x402,0x402,0x3f6,0xda4,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x3f0,0x18,0x18,0x18,0x18,0x408,0x408,0x18,0x408, -0x405,0x405,0x3f0,0x3f0,0x18,0x18,0x948,0x948,0x948,0x948,0x948,0x948,0x948,0x948,0x948,0x948, -0x405,0x405,0x3fc,0x3fc,0x3f9,0x3f9,0x3f9,0x3f9,0x3f9,0x3fc,0x3f9,0x1125,0x184b,0x1848,0x18f0,0x18, -0x1b,0xcb4,0x40b,0xcb7,0x1b,0x417,0x417,0x417,0x417,0x417,0x417,0x1b,0x1b,0x1b,0x1b,0x417, -0x417,0x1b,0x1b,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417, -0x417,0x1b,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x1b,0x417,0x41a,0x1b,0x417,0x41a,0x1b, -0x417,0x417,0x1b,0x1b,0x40e,0x1b,0x414,0x414,0x414,0x40b,0x40b,0x1b,0x1b,0x1b,0x1b,0x40b, -0x40b,0x1b,0x1b,0x40b,0x40b,0x411,0x1b,0x1b,0x1b,0xf81,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x41a,0x41a,0x41a,0x417,0x1b,0x41a,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x94b,0x94b, -0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x40b,0x40b,0x417,0x417,0x417,0xf81,0x18f3,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1e,0x41d,0x41d,0x426,0x1e,0x429,0x429,0x429, -0x429,0x429,0x429,0x429,0xcc0,0x429,0x1e,0x429,0x429,0x429,0x1e,0x429,0x429,0x429,0x429,0x429, -0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x1e,0x429,0x429,0x429,0x429,0x429,0x429, -0x429,0x1e,0x429,0x429,0x1e,0x429,0x429,0x429,0x429,0x429,0x1e,0x1e,0x420,0x429,0x426,0x426, -0x426,0x41d,0x41d,0x41d,0x41d,0x41d,0x1e,0x41d,0x41d,0x426,0x1e,0x426,0x426,0x423,0x1e,0x1e, -0x429,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, -0x429,0xcc0,0xcba,0xcba,0x1e,0x1e,0x94e,0x94e,0x94e,0x94e,0x94e,0x94e,0x94e,0x94e,0x94e,0x94e, -0x13d4,0xcbd,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x16cb,0x184e,0x184e,0x184e,0x1851,0x1851,0x1851, -0x21,0x42c,0x43b,0x43b,0x21,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x21,0x21,0x441, -0x441,0x21,0x21,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441, -0x441,0x21,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x21,0x441,0x441,0x21,0xcc3,0x441,0x441, -0x441,0x441,0x21,0x21,0x42f,0x441,0x42c,0x42c,0x43b,0x42c,0x42c,0x42c,0xf84,0x21,0x21,0x43b, -0x43e,0x21,0x21,0x43e,0x43e,0x432,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x1a5b,0x42c,0x42c, -0x21,0x21,0x21,0x21,0x444,0x444,0x21,0x441,0x441,0x441,0xf84,0xf84,0x21,0x21,0x438,0x438, -0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x435,0xcc3,0x12fc,0x12fc,0x12fc,0x12fc,0x12fc,0x12fc, -0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x24,0x24,0x447,0x453,0x24,0x453,0x453,0x453, -0x453,0x453,0x453,0x24,0x24,0x24,0x453,0x453,0x453,0x24,0x453,0x453,0x456,0x453,0x24,0x24, -0x24,0x453,0x453,0x24,0x453,0x24,0x453,0x453,0x24,0x24,0x24,0x453,0x453,0x24,0x24,0x24, -0x453,0x453,0x453,0x24,0x24,0x24,0x453,0x453,0x453,0x453,0x453,0x453,0x453,0x453,0xda7,0x453, -0x453,0x453,0x24,0x24,0x24,0x24,0x447,0x44d,0x447,0x44d,0x44d,0x24,0x24,0x24,0x44d,0x44d, -0x44d,0x24,0x450,0x450,0x450,0x44a,0x24,0x24,0xf87,0x24,0x24,0x24,0x24,0x24,0x24,0x447, -0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0xebb,0x954,0x954,0x954,0x954,0x954, -0x954,0x954,0x954,0x954,0x951,0x951,0x951,0xd77,0xcc6,0xcc6,0xcc6,0xcc6,0xcc6,0xcc9,0xcc6,0x24, -0x24,0x24,0x24,0x24,0x14be,0x465,0x465,0x465,0x18f6,0x468,0x468,0x468,0x468,0x468,0x468,0x468, -0x468,0x27,0x468,0x468,0x468,0x27,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468, -0x468,0x468,0x468,0x468,0x468,0x27,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468, -0x14c1,0x468,0x468,0x468,0x468,0x468,0x27,0x27,0x1b00,0xf90,0x459,0x459,0x459,0x465,0x465,0x465, -0x465,0x27,0x459,0x459,0x45c,0x27,0x459,0x459,0x459,0x45f,0x27,0x27,0x27,0x27,0x27,0x27, -0x27,0x459,0x459,0x27,0xf90,0xf90,0x16ce,0x27,0x27,0x1b03,0x27,0x27,0x468,0x468,0xf8a,0xf8a, -0x27,0x27,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x27,0x27,0x27,0x27, -0x27,0x27,0x27,0x19bf,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0x178e,0x14c4,0x471,0x471, -0x18f9,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x2a,0x477,0x477,0x477,0x2a,0x477,0x477, -0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x2a,0x477,0x477, -0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x2a,0x477,0x477,0x477,0x477,0x477,0x2a,0x2a, -0xccc,0xccf,0x471,0x46b,0x474,0x471,0x46b,0x471,0x471,0x2a,0x46b,0x474,0x474,0x2a,0x474,0x474, -0x46b,0x46e,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x46b,0x46b,0x2a,0x2a,0x2a,0x2a,0x2a, -0x2a,0x1b06,0x477,0x2a,0x477,0x477,0xed3,0xed3,0x2a,0x2a,0x957,0x957,0x957,0x957,0x957,0x957, -0x957,0x957,0x957,0x957,0x2a,0xed6,0xed6,0x1bc9,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a, -0x2a,0x2a,0x2a,0x2a,0x1854,0x14c7,0x483,0x483,0x1a5e,0x489,0x489,0x489,0x489,0x489,0x489,0x489, -0x489,0x2d,0x489,0x489,0x489,0x2d,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489, -0x489,0x489,0x489,0x489,0x483,0x47a,0x47a,0x47a,0xf93,0x2d,0x483,0x483,0x483,0x2d,0x486,0x486, -0x486,0x47d,0x1302,0x1791,0x2d,0x2d,0x2d,0x2d,0x1794,0x1794,0x1794,0x47a,0x1791,0x1791,0x1791,0x1791, -0x1791,0x1791,0x1791,0x16d1,0x489,0x489,0xf93,0xf93,0x2d,0x2d,0x480,0x480,0x480,0x480,0x480,0x480, -0x480,0x480,0x480,0x480,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0x1791,0x1791,0x1791,0xf99,0xf9c,0xf9c, -0xf9c,0xf9c,0xf9c,0xf9c,0x30,0x1a61,0xa23,0xa23,0x30,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, -0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0x30,0x30,0x30,0xa29,0xa29, -0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, -0xa29,0xa29,0x30,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0x30,0xa29,0x30,0x30, -0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0x30,0x30,0x30,0xa1d,0x30,0x30,0x30,0x30,0xa1a, -0xa23,0xa23,0xa1a,0xa1a,0xa1a,0x30,0xa1a,0x30,0xa23,0xa23,0xa26,0xa23,0xa26,0xa26,0xa26,0xa1a, -0x30,0x30,0x30,0x30,0x30,0x30,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca, -0x30,0x30,0xa23,0xa23,0xa20,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30, -0x33,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4, +6,6,6,6,6,6,6,6,6,6,6,6,0xd53,0xd53,0xd53,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,0x14c4,0x3f0,0x3ff,0x3ff, +0x18,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x18,0x18,0x405,0x405,0x18,0x18,0x405, +0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x405,0x18,0x405,0x405, +0x405,0x405,0x405,0x405,0x405,0x18,0x405,0x18,0x18,0x18,0x405,0x405,0x405,0x405,0x18,0x18, +0x3f3,0xcb4,0x3f0,0x3ff,0x3ff,0x3f0,0x3f0,0x3f0,0x3f0,0x18,0x18,0x3ff,0x3ff,0x18,0x18,0x402, +0x402,0x3f6,0xda7,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x3f0,0x18,0x18,0x18,0x18, +0x408,0x408,0x18,0x408,0x405,0x405,0x3f0,0x3f0,0x18,0x18,0x948,0x948,0x948,0x948,0x948,0x948, +0x948,0x948,0x948,0x948,0x405,0x405,0x3fc,0x3fc,0x3f9,0x3f9,0x3f9,0x3f9,0x3f9,0x3fc,0x3f9,0x112b, +0x185a,0x1857,0x18ff,0x18,0x1b,0xcb7,0x40b,0xcba,0x1b,0x417,0x417,0x417,0x417,0x417,0x417,0x1b, +0x1b,0x1b,0x1b,0x417,0x417,0x1b,0x1b,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417, +0x417,0x417,0x417,0x417,0x417,0x1b,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x1b,0x417,0x41a, +0x1b,0x417,0x41a,0x1b,0x417,0x417,0x1b,0x1b,0x40e,0x1b,0x414,0x414,0x414,0x40b,0x40b,0x1b, +0x1b,0x1b,0x1b,0x40b,0x40b,0x1b,0x1b,0x40b,0x40b,0x411,0x1b,0x1b,0x1b,0xf84,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x41a,0x41a,0x41a,0x417,0x1b,0x41a,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x94b,0x40b,0x40b,0x417,0x417, +0x417,0xf84,0x1902,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1e,0x41d,0x41d,0x426, +0x1e,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0xcc3,0x429,0x1e,0x429,0x429,0x429,0x1e,0x429, +0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x429,0x1e,0x429,0x429, +0x429,0x429,0x429,0x429,0x429,0x1e,0x429,0x429,0x1e,0x429,0x429,0x429,0x429,0x429,0x1e,0x1e, +0x420,0x429,0x426,0x426,0x426,0x41d,0x41d,0x41d,0x41d,0x41d,0x1e,0x41d,0x41d,0x426,0x1e,0x426, +0x426,0x423,0x1e,0x1e,0x429,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x429,0xcc3,0xcbd,0xcbd,0x1e,0x1e,0x94e,0x94e,0x94e,0x94e,0x94e,0x94e, +0x94e,0x94e,0x94e,0x94e,0x13dd,0xcc0,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x16da,0x185d,0x185d, +0x185d,0x1860,0x1860,0x1860,0x21,0x42c,0x43b,0x43b,0x21,0x441,0x441,0x441,0x441,0x441,0x441,0x441, +0x441,0x21,0x21,0x441,0x441,0x21,0x21,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441, +0x441,0x441,0x441,0x441,0x441,0x21,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x21,0x441,0x441, +0x21,0xcc6,0x441,0x441,0x441,0x441,0x21,0x21,0x42f,0x441,0x42c,0x42c,0x43b,0x42c,0x42c,0x42c, +0xf87,0x21,0x21,0x43b,0x43e,0x21,0x21,0x43e,0x43e,0x432,0x21,0x21,0x21,0x21,0x21,0x21, +0x21,0x1a6d,0x42c,0x42c,0x21,0x21,0x21,0x21,0x444,0x444,0x21,0x441,0x441,0x441,0xf87,0xf87, +0x21,0x21,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x435,0xcc6,0x1302,0x1302, +0x1302,0x1302,0x1302,0x1302,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x24,0x24,0x447,0x453, +0x24,0x453,0x453,0x453,0x453,0x453,0x453,0x24,0x24,0x24,0x453,0x453,0x453,0x24,0x453,0x453, +0x456,0x453,0x24,0x24,0x24,0x453,0x453,0x24,0x453,0x24,0x453,0x453,0x24,0x24,0x24,0x453, +0x453,0x24,0x24,0x24,0x453,0x453,0x453,0x24,0x24,0x24,0x453,0x453,0x453,0x453,0x453,0x453, +0x453,0x453,0xdaa,0x453,0x453,0x453,0x24,0x24,0x24,0x24,0x447,0x44d,0x447,0x44d,0x44d,0x24, +0x24,0x24,0x44d,0x44d,0x44d,0x24,0x450,0x450,0x450,0x44a,0x24,0x24,0xf8a,0x24,0x24,0x24, +0x24,0x24,0x24,0x447,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0xebe,0x954, +0x954,0x954,0x954,0x954,0x954,0x954,0x954,0x954,0x951,0x951,0x951,0xd7a,0xcc9,0xcc9,0xcc9,0xcc9, +0xcc9,0xccc,0xcc9,0x24,0x24,0x24,0x24,0x24,0x14c7,0x465,0x465,0x465,0x1905,0x468,0x468,0x468, +0x468,0x468,0x468,0x468,0x468,0x27,0x468,0x468,0x468,0x27,0x468,0x468,0x468,0x468,0x468,0x468, +0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x468,0x27,0x468,0x468,0x468,0x468,0x468,0x468, +0x468,0x468,0x468,0x468,0x14ca,0x468,0x468,0x468,0x468,0x468,0x27,0x27,0x1b12,0xf93,0x459,0x459, +0x459,0x465,0x465,0x465,0x465,0x27,0x459,0x459,0x45c,0x27,0x459,0x459,0x459,0x45f,0x27,0x27, +0x27,0x27,0x27,0x27,0x27,0x459,0x459,0x27,0xf93,0xf93,0x16dd,0x27,0x27,0x1b15,0x27,0x27, +0x468,0x468,0xf8d,0xf8d,0x27,0x27,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462, +0x27,0x27,0x27,0x27,0x27,0x27,0x27,0x19d1,0xf90,0xf90,0xf90,0xf90,0xf90,0xf90,0xf90,0xf90, +0x179d,0x14cd,0x471,0x471,0x1908,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x2a,0x477,0x477, +0x477,0x2a,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477, +0x477,0x2a,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x2a,0x477,0x477,0x477, +0x477,0x477,0x2a,0x2a,0xccf,0xcd2,0x471,0x46b,0x474,0x471,0x46b,0x471,0x471,0x2a,0x46b,0x474, +0x474,0x2a,0x474,0x474,0x46b,0x46e,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x46b,0x46b,0x2a, +0x2a,0x2a,0x2a,0x2a,0x2a,0x1b18,0x477,0x2a,0x477,0x477,0xed6,0xed6,0x2a,0x2a,0x957,0x957, +0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x2a,0xed9,0xed9,0x1bdb,0x2a,0x2a,0x2a,0x2a, +0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x1863,0x14d0,0x483,0x483,0x1a70,0x489,0x489,0x489, +0x489,0x489,0x489,0x489,0x489,0x2d,0x489,0x489,0x489,0x2d,0x489,0x489,0x489,0x489,0x489,0x489, +0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x483,0x47a,0x47a,0x47a,0xf96,0x2d,0x483,0x483, +0x483,0x2d,0x486,0x486,0x486,0x47d,0x1308,0x17a0,0x2d,0x2d,0x2d,0x2d,0x17a3,0x17a3,0x17a3,0x47a, +0x17a0,0x17a0,0x17a0,0x17a0,0x17a0,0x17a0,0x17a0,0x16e0,0x489,0x489,0xf96,0xf96,0x2d,0x2d,0x480,0x480, +0x480,0x480,0x480,0x480,0x480,0x480,0x480,0x480,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0x17a0,0x17a0, +0x17a0,0xf9c,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0x30,0x1a73,0xa23,0xa23,0x30,0xa29,0xa29,0xa29, +0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0x30, +0x30,0x30,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, +0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0x30,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, +0x30,0xa29,0x30,0x30,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0x30,0x30,0x30,0xa1d,0x30, +0x30,0x30,0x30,0xa1a,0xa23,0xa23,0xa1a,0xa1a,0xa1a,0x30,0xa1a,0x30,0xa23,0xa23,0xa26,0xa23, +0xa26,0xa26,0xa26,0xa1a,0x30,0x30,0x30,0x30,0x30,0x30,0x14d3,0x14d3,0x14d3,0x14d3,0x14d3,0x14d3, +0x14d3,0x14d3,0x14d3,0x14d3,0x30,0x30,0xa23,0xa23,0xa20,0x30,0x30,0x30,0x30,0x30,0x30,0x30, +0x30,0x30,0x30,0x30,0x33,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4, 0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x48f,0x4a4,0x4a1,0x48f,0x48f,0x48f,0x48f,0x48f,0x48f,0x495,0x33,0x33,0x33,0x33,0x48c, -0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4a4,0x4a7,0x492,0x492,0x492,0x492,0x492,0x492,0x48f,0x492,0x498, -0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49b,0x49b,0x33,0x33,0x33,0x33, +0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x48f,0x4a4,0x4a1,0x48f,0x48f,0x48f,0x48f,0x48f,0x48f,0x495,0x33, +0x33,0x33,0x33,0x48c,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4a4,0x4a7,0x492,0x492,0x492,0x492,0x492, +0x492,0x48f,0x492,0x498,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49b,0x49b, 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, -0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x36,0x4b9,0x4b9,0x36, -0x4b9,0x36,0x19c5,0x4b9,0x4b9,0x19c5,0x4b9,0x36,0x19c5,0x4b9,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5, -0x4b9,0x4b9,0x4b9,0x4b9,0x19c5,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x19c5,0x4b9,0x4b9,0x4b9, -0x36,0x4b9,0x36,0x4b9,0x19c5,0x19c5,0x4b9,0x4b9,0x19c5,0x4b9,0x4b9,0x4b9,0x4b9,0x4ad,0x4b9,0x4b6, -0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x19c2,0x4ad,0x4ad,0x4b9,0x36,0x36,0x4c2,0x4c2,0x4c2,0x4c2, -0x4c2,0x36,0x4bf,0x36,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4ad,0x1bcc,0x36,0x4b3,0x4b3,0x4b3,0x4b3, -0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x36,0x36,0x4bc,0x4bc,0x13d7,0x13d7,0x36,0x36,0x36,0x36, +0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, +0x36,0x4b9,0x4b9,0x36,0x4b9,0x36,0x19d7,0x4b9,0x4b9,0x19d7,0x4b9,0x36,0x19d7,0x4b9,0x19d7,0x19d7, +0x19d7,0x19d7,0x19d7,0x19d7,0x4b9,0x4b9,0x4b9,0x4b9,0x19d7,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9, +0x19d7,0x4b9,0x4b9,0x4b9,0x36,0x4b9,0x36,0x4b9,0x19d7,0x19d7,0x4b9,0x4b9,0x19d7,0x4b9,0x4b9,0x4b9, +0x4b9,0x4ad,0x4b9,0x4b6,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x19d4,0x4ad,0x4ad,0x4b9,0x36,0x36, +0x4c2,0x4c2,0x4c2,0x4c2,0x4c2,0x36,0x4bf,0x36,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4ad,0x1bde,0x36, +0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x36,0x36,0x4bc,0x4bc,0x13e0,0x13e0, 0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36, -0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x99c,0x99c,0x99c,0x99f, -0x99c,0x99c,0x99c,0x99c,0x39,0x99c,0x99c,0x99c,0x99c,0x99f,0x99c,0x99c,0x99c,0x99c,0x99f,0x99c, -0x99c,0x99c,0x99c,0x99f,0x99c,0x99c,0x99c,0x99c,0x99f,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c, -0x99c,0x99c,0x99c,0x99c,0x99c,0x99f,0xa38,0xfa8,0xfa8,0x39,0x39,0x39,0x39,0x966,0x966,0x969, -0x966,0x969,0x969,0x975,0x969,0x975,0x966,0x966,0x966,0x966,0x966,0x996,0x966,0x969,0x96f,0x96f, -0x972,0x97b,0x96c,0x96c,0x99c,0x99c,0x99c,0x99c,0x130b,0x1305,0x1305,0x1305,0x966,0x966,0x966,0x969, -0x966,0x966,0xa2c,0x966,0x39,0x966,0x966,0x966,0x966,0x969,0x966,0x966,0x966,0x966,0x969,0x966, -0x966,0x966,0x966,0x969,0x966,0x966,0x966,0x966,0x969,0x966,0xa2c,0xa2c,0xa2c,0x966,0x966,0x966, -0x966,0x966,0x966,0x966,0xa2c,0x969,0xa2c,0xa2c,0xa2c,0x39,0xa35,0xa35,0xa32,0xa32,0xa32,0xa32, -0xa32,0xa32,0xa2f,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0x39,0xf9f,0xa32,0xdaa,0xdaa,0xfa2,0xfa5, -0xf9f,0x1128,0x1128,0x1128,0x1128,0x1308,0x1308,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39, +0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36, +0x99c,0x99c,0x99c,0x99f,0x99c,0x99c,0x99c,0x99c,0x39,0x99c,0x99c,0x99c,0x99c,0x99f,0x99c,0x99c, +0x99c,0x99c,0x99f,0x99c,0x99c,0x99c,0x99c,0x99f,0x99c,0x99c,0x99c,0x99c,0x99f,0x99c,0x99c,0x99c, +0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99f,0xa38,0xfab,0xfab,0x39,0x39,0x39, +0x39,0x966,0x966,0x969,0x966,0x969,0x969,0x975,0x969,0x975,0x966,0x966,0x966,0x966,0x966,0x996, +0x966,0x969,0x96f,0x96f,0x972,0x97b,0x96c,0x96c,0x99c,0x99c,0x99c,0x99c,0x1311,0x130b,0x130b,0x130b, +0x966,0x966,0x966,0x969,0x966,0x966,0xa2c,0x966,0x39,0x966,0x966,0x966,0x966,0x969,0x966,0x966, +0x966,0x966,0x969,0x966,0x966,0x966,0x966,0x969,0x966,0x966,0x966,0x966,0x969,0x966,0xa2c,0xa2c, +0xa2c,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0xa2c,0x969,0xa2c,0xa2c,0xa2c,0x39,0xa35,0xa35, +0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa2f,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0x39,0xfa2,0xa32, +0xdad,0xdad,0xfa5,0xfa8,0xfa2,0x112e,0x112e,0x112e,0x112e,0x130e,0x130e,0x39,0x39,0x39,0x39,0x39, 0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39, -0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x3c,0x13dd, -0x3c,0x3c,0x3c,0x3c,0x3c,0x13dd,0x3c,0x3c,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5, -0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdb9, +0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x4c8,0x4c8,0x4c8,0x4c8, +0x4c8,0x4c8,0x3c,0x13e6,0x3c,0x3c,0x3c,0x3c,0x3c,0x13e6,0x3c,0x3c,0x4c5,0x4c5,0x4c5,0x4c5, +0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xdbc,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0x3f,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xdbc,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdbc, 0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0x3f, -0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdb9, -0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdb9,0xa62,0x3f,0xa62,0xa62, -0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0x3f,0xa62,0x3f,0xa62,0xa62, -0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdb9,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xdb9,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xdb9,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0x130e,0x130e,0xdb3,0xdb6,0xa5c,0xa65,0xa59, -0xa59,0xa59,0xa59,0xa65,0xa65,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa56,0xa56, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0x3f,0x3f,0x3f,0xa68,0xa68,0xa68,0xa68, +0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdbc, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdbc,0xa62,0x3f,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdbc,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0x3f,0x3f,0x1314,0x1314,0xdb6, +0xdb9,0xa5c,0xa65,0xa59,0xa59,0xa59,0xa59,0xa65,0xa65,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f,0xa5f, +0xa5f,0xa5f,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0x3f,0x3f,0x3f, 0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, -0xa68,0x16d7,0x42,0x42,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x42,0x42,0xa7a,0xa7d,0xa7d,0xa7d, -0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, -0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa77,0xa74,0x45,0x45,0x45,0xa83,0xa83,0xa83,0xa83, -0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa80,0xa80,0xa80,0xa83,0xa83,0xa83,0x14cd,0x14cd,0x14cd, -0x14cd,0x14cd,0x14cd,0x14cd,0x14cd,0x48,0x48,0x48,0x48,0x48,0x48,0x48,0xaa4,0xaa4,0xaa4,0xaa4, -0xaa4,0xaa4,0xa86,0xaa4,0xaa4,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa8c,0xa89, -0xa9b,0xa9b,0xa9e,0xaa7,0xa95,0xa92,0xa9b,0xa98,0xaa7,0xcd2,0x4b,0x4b,0xaa1,0xaa1,0xaa1,0xaa1, -0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xcd5,0xcd5,0xcd5,0xcd5, -0xcd5,0xcd5,0xcd5,0xcd5,0xcd5,0xcd5,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xab6,0xab6,0xb2e,0xb31, -0xabc,0xb2b,0xab9,0xab6,0xabf,0xace,0xac2,0xad1,0xad1,0xad1,0xaad,0x1b09,0xac5,0xac5,0xac5,0xac5, -0xac5,0xac5,0xac5,0xac5,0xac5,0xac5,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0x18fc,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0xac8,0xab0,0xfc6,0x4e,0x4e,0x4e,0x4e,0x4e,0x117f,0x117f,0x117f,0x117f, -0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x51,0x51,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x51,0x51,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x4e9,0x51,0x4e9,0x51,0x4e9,0x51,0x4e9,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x51,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e0,0x4e6,0x4e0,0x4e0,0x4dd,0x4e6,0x4e6, -0x4e6,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4dd,0x4dd,0x4dd,0x4e6,0x4e6,0x4e6,0x4e6, -0x51,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x51,0x4dd,0x4dd,0x4dd,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4dd,0x4dd,0x4dd,0x51,0x51,0x4e6,0x4e6, -0x4e6,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e3,0x4e0,0x51,0xba6,0xba9,0xba9,0xba9, -0xfcf,0x54,0x14a9,0x14a9,0x14a9,0x14a9,0x4f2,0x4f2,0x4f2,0x4f2,0x4f2,0x4f2,0x53d,0xbbb,0x57,0x57, -0x6d8,0x53d,0x53d,0x53d,0x53d,0x53d,0x543,0x555,0x543,0x54f,0x549,0x6db,0x53a,0x6d5,0x6d5,0x6d5, -0x6d5,0x53a,0x53a,0x53a,0x53a,0x53a,0x540,0x552,0x540,0x54c,0x546,0x57,0xdc2,0xdc2,0xdc2,0xdc2, -0xdc2,0x1311,0x1311,0x1311,0x1311,0x1311,0x1311,0x1311,0x1311,0x57,0x57,0x57,0x1b0c,0x5a,0x5a,0x5a, -0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x564,0x564,0x564,0x564, -0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x561,0x561,0x561,0x561,0x564,0xadd,0xadd, -0xbc1,0xbc7,0xbc7,0xbc4,0xbc4,0xbc4,0xbc4,0xdc8,0xed9,0xed9,0xed9,0xed9,0x1113,0x5d,0x5d,0x5d, -0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x594,0x594,0x594,0xae6, -0xee2,0xfd5,0xfd5,0xfd5,0xfd5,0x126f,0x16dd,0x16dd,0x60,0x60,0x60,0x60,0x702,0x702,0x702,0x702, -0x702,0x702,0x702,0x702,0x702,0x702,0x5a0,0x5a0,0x59d,0x59d,0x59d,0x59d,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0xaef,0xaef,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, -0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x5c4,0x5c4,0x5c4,0x5c4, -0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66, -0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0x69,0xb0a,0xb0a,0xb0a,0xb0a,0xb0d,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0d, -0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x72,0x822,0x81c,0x822, -0x81c,0x822,0x81c,0x822,0x81c,0x822,0x81c,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c, -0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81c,0x81c,0x81c,0x822, -0x81c,0x822,0x81c,0x822,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x822,0x81c,0x81c,0x81c,0x81c,0x81c, -0x81f,0xc60,0xc60,0x72,0x72,0x936,0x936,0x8fd,0x8fd,0x825,0x828,0xc5d,0x75,0x75,0x75,0x75, -0x75,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a, -0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x1101,0x18c3,0x19aa, -0x78,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, -0x83d,0x83d,0x83d,0x78,0x906,0x906,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909, -0x909,0x909,0x909,0x909,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846, +0xa68,0xa68,0xa68,0xa68,0xa68,0x16e6,0x42,0x42,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x42,0x42, +0xa7a,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa77,0xa74,0x45,0x45,0x45, +0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa80,0xa80,0xa80,0xa83,0xa83, +0xa83,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x48,0x48,0x48,0x48,0x48,0x48,0x48, +0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xa86,0xaa4,0xaa4,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, +0xa89,0xa89,0xa8c,0xa89,0xaad,0xaad,0xa9e,0xaa7,0xa95,0xa92,0xa9b,0xa98,0xaa7,0xcd5,0x4b,0x4b, +0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b, +0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b, +0xab9,0xab9,0xb31,0xb34,0xabf,0xb2e,0xabc,0xab9,0xac2,0xad1,0xac5,0xad4,0xad4,0xad4,0xab0,0x1b1b, +0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0x190b,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xab3,0xfc9,0x4e,0x4e,0x4e,0x4e,0x4e, +0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x51,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x51,0x51, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x4e9,0x51,0x4e9,0x51,0x4e9,0x51,0x4e9, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x51, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e0,0x4e6,0x4e0, +0x4e0,0x4dd,0x4e6,0x4e6,0x4e6,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4dd,0x4dd,0x4dd, +0x4e6,0x4e6,0x4e6,0x4e6,0x51,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x51,0x4dd,0x4dd,0x4dd, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4dd,0x4dd,0x4dd, +0x51,0x51,0x4e6,0x4e6,0x4e6,0x51,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e3,0x4e0,0x51, +0xba9,0xbac,0xbac,0xbac,0xfd2,0x54,0x14b2,0x14b2,0x14b2,0x14b2,0x4f2,0x4f2,0x4f2,0x4f2,0x4f2,0x4f2, +0x53d,0xbbe,0x57,0x57,0x6d8,0x53d,0x53d,0x53d,0x53d,0x53d,0x543,0x555,0x543,0x54f,0x549,0x6db, +0x53a,0x6d5,0x6d5,0x6d5,0x6d5,0x53a,0x53a,0x53a,0x53a,0x53a,0x540,0x552,0x540,0x54c,0x546,0x57, +0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0x1317,0x1317,0x1317,0x1317,0x1317,0x1317,0x1317,0x1317,0x57,0x57,0x57, +0x1b1e,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a, +0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x561,0x561,0x561, +0x561,0x564,0xae0,0xae0,0xbc4,0xbca,0xbca,0xbc7,0xbc7,0xbc7,0xbc7,0xdcb,0xedc,0xedc,0xedc,0xedc, +0x1119,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d, +0x594,0x594,0x594,0xae9,0xee5,0xfd8,0xfd8,0xfd8,0xfd8,0x1275,0x16ec,0x16ec,0x60,0x60,0x60,0x60, +0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x5a0,0x5a0,0x59d,0x59d,0x59d,0x59d, +0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0xaf2,0xaf2,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, +0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, +0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x66,0x66,0x66,0x66,0x66, +0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0x69,0xb0d,0xb0d,0xb0d,0xb0d,0xb10, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d, +0xb0d,0xb0d,0xb0d,0xb10,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69, +0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, +0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c, +0x6f,0x822,0x81c,0x822,0x81c,0x822,0x81c,0x822,0x81c,0x822,0x81c,0x81c,0x81f,0x81c,0x81f,0x81c, +0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c, +0x81c,0x81c,0x81c,0x822,0x81c,0x822,0x81c,0x822,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x822,0x81c, +0x81c,0x81c,0x81c,0x81c,0x81f,0xc63,0xc63,0x6f,0x6f,0x936,0x936,0x8fd,0x8fd,0x825,0x828,0xc60, +0x72,0x72,0x72,0x72,0x72,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a, +0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a,0x83a, +0x83a,0x1107,0x18d2,0x19bc,0x75,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, +0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x75,0x906,0x906,0x909,0x909,0x909,0x909,0x909,0x909, +0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846, 0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846, -0x846,0xd5c,0xd5c,0x7b,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0x7e,0x7e,0x7e,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xc69,0xb28,0xb28,0xb28,0xc69,0xb28,0x81,0x81,0x81,0x81,0x81, -0x81,0x81,0x81,0x81,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6, -0x11a6,0x11a6,0x11a6,0x11a6,0x9c0,0x9c0,0x9c0,0x9c0,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84, -0x84,0x84,0x84,0x84,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b, -0x121b,0x121b,0x121b,0x121b,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x87,0x87,0x87,0x87,0x87, -0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x87,0x87,0x87,0x87, -0x87,0xafb,0x5fa,0x600,0x606,0x606,0x606,0x606,0x606,0x606,0x606,0x606,0x606,0x5fd,0x600,0x600, -0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x87,0x600,0x600,0x600,0x600, -0x600,0x87,0x600,0x87,0x600,0x600,0x87,0x600,0x600,0x87,0x600,0x600,0x600,0x600,0x600,0x600, -0x600,0x600,0x600,0x603,0x615,0x60f,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, -0x612,0x618,0x615,0x60f,0x1323,0x1323,0x1b0f,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a, -0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x615,0x60f,0x612,0x618,0x615,0x60f,0x615,0x60f,0x615, -0x60f,0x615,0x615,0x60f,0x60f,0x60f,0x60f,0x612,0x60f,0x60f,0x612,0x60f,0x612,0x612,0x612,0x60f, -0x612,0x612,0x612,0x612,0x8a,0x8a,0x612,0x612,0x612,0x612,0x60f,0x60f,0x612,0x60f,0x60f,0x60f, -0x60f,0x612,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x60f,0x60f,0x8a,0x8a,0x8a,0x8a, -0x8a,0x8a,0x8a,0x1b0f,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46, -0xb46,0xb46,0xb46,0xb46,0x85e,0x870,0x86d,0x870,0x86d,0xc7e,0xc7e,0xd68,0xd65,0x861,0x861,0x861, -0x861,0x873,0x873,0x873,0x88b,0x88e,0x89d,0x8d,0x891,0x894,0x8a0,0x8a0,0x888,0x87f,0x879,0x87f, -0x879,0x87f,0x879,0x87c,0x87c,0x897,0x897,0x89a,0x897,0x897,0x897,0x8d,0x897,0x885,0x882,0x87c, -0x8d,0x8d,0x8d,0x8d,0x621,0x62d,0x621,0xbfa,0x621,0x90,0x621,0x62d,0x621,0x62d,0x621,0x62d, -0x621,0x62d,0x621,0x62d,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, -0x62d,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a, -0x624,0x90,0x90,0x61e,0x75f,0x762,0x777,0x77a,0x759,0x762,0x762,0x96,0x741,0x744,0x744,0x744, -0x744,0x741,0x741,0x96,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0xafe,0xafe,0xafe, -0x9c3,0x73b,0x630,0x630,0x96,0x789,0x768,0x759,0x762,0x75f,0x759,0x76b,0x75c,0x756,0x759,0x777, -0x76e,0x765,0x786,0x759,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x774,0x771, -0x777,0x777,0x777,0x789,0x74a,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747, +0x846,0x846,0x846,0x846,0x846,0xd5f,0xd5f,0x78,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb25,0xb25,0x7b,0x7b,0x7b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b, +0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xc6c,0xb2b,0xb2b,0xb2b,0xc6c,0xb2b,0x7e, +0x7e,0x7e,0x7e,0x7e,0x7e,0x7e,0x7e,0x7e,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac, +0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x9c0,0x9c0,0x9c0,0x9c0,0x81,0x81,0x81,0x81, +0x81,0x81,0x81,0x81,0x81,0x81,0x81,0x81,0x1221,0x1221,0x1221,0x1221,0x1221,0x1221,0x1221,0x1221, +0x1221,0x1221,0x1221,0x1221,0x1221,0x1221,0x1221,0x1221,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x84, +0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7, +0x84,0x84,0x84,0x84,0x84,0xafe,0x5fa,0x600,0x606,0x606,0x606,0x606,0x606,0x606,0x606,0x606, +0x606,0x5fd,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x84, +0x600,0x600,0x600,0x600,0x600,0x84,0x600,0x84,0x600,0x600,0x84,0x600,0x600,0x84,0x600,0x600, +0x600,0x600,0x600,0x600,0x600,0x600,0x600,0x603,0x615,0x60f,0x615,0x60f,0x612,0x618,0x615,0x60f, +0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x1329,0x1329,0x1b21,0x87,0x87,0x87,0x87,0x87, +0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x615,0x60f,0x612,0x618,0x615, +0x60f,0x615,0x60f,0x615,0x60f,0x615,0x615,0x60f,0x60f,0x60f,0x60f,0x612,0x60f,0x60f,0x612,0x60f, +0x612,0x612,0x612,0x60f,0x612,0x612,0x612,0x612,0x87,0x87,0x612,0x612,0x612,0x612,0x60f,0x60f, +0x612,0x60f,0x60f,0x60f,0x60f,0x612,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x60f,0x60f, +0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x1b21,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49, +0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0x85e,0x870,0x86d,0x870,0x86d,0xc81,0xc81,0xd6b, +0xd68,0x861,0x861,0x861,0x861,0x873,0x873,0x873,0x88b,0x88e,0x89d,0x8a,0x891,0x894,0x8a0,0x8a0, +0x888,0x87f,0x879,0x87f,0x879,0x87f,0x879,0x87c,0x87c,0x897,0x897,0x89a,0x897,0x897,0x897,0x8a, +0x897,0x885,0x882,0x87c,0x8a,0x8a,0x8a,0x8a,0x621,0x62d,0x621,0xbfd,0x621,0x8d,0x621,0x62d, +0x621,0x62d,0x621,0x62d,0x621,0x62d,0x621,0x62d,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, +0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a, +0x624,0x62a,0x624,0x62a,0x624,0x8d,0x8d,0x61e,0x75f,0x762,0x777,0x77a,0x759,0x762,0x762,0x93, +0x741,0x744,0x744,0x744,0x744,0x741,0x741,0x93,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, +0x90,0xb01,0xb01,0xb01,0x9c3,0x73b,0x630,0x630,0x93,0x789,0x768,0x759,0x762,0x75f,0x759,0x76b, +0x75c,0x756,0x759,0x777,0x76e,0x765,0x786,0x759,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783, +0x783,0x783,0x774,0x771,0x777,0x777,0x777,0x789,0x74a,0x747,0x747,0x747,0x747,0x747,0x747,0x747, 0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747, -0x747,0x747,0x747,0x96,0x96,0x96,0x747,0x747,0x747,0x747,0x747,0x747,0x96,0x96,0x747,0x747, -0x747,0x747,0x747,0x747,0x96,0x96,0x747,0x747,0x747,0x747,0x747,0x747,0x96,0x96,0x747,0x747, -0x747,0x96,0x96,0x96,0xb49,0xb49,0xb49,0xb49,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99, -0x99,0x1860,0x1860,0x1860,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f, -0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0x9c,0x9c,0x9c,0x9c,0x9c,0x1626,0x1626,0x1626,0x1626, -0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0xb58,0xb58,0xb58,0xb58, -0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58, -0xb58,0xb58,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xa2,0xa2,0xfe1,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0x16e3,0x16e3,0x16e3,0x16e3, -0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x1b12,0x1b12,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2, -0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xa5,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xa5,0xb7c,0xb7c,0xa5,0xa5,0xb7c,0xa5, -0xa5,0xb7c,0xb7c,0xa5,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb79,0xa5,0xb79,0xa5,0xb79,0xb79,0xb79,0xb79,0xcf0,0xb79,0xb79, -0xa5,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb79, -0xb7c,0xb7c,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xa5,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xa5,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xa5,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xa5,0xb7c,0xa5,0xa5,0xa5,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xa5,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xdda,0xdda,0xa5,0xa5, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb73,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xef1,0xeee,0xa5,0xa5,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xa8,0xb82,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8, -0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8, -0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09, -0xc09,0xc09,0xc09,0xc09,0xc09,0x1b18,0xc09,0xc09,0xc09,0xc09,0xc03,0xc03,0xc06,0x1b15,0xab,0xab, -0xab,0xab,0xab,0xab,0xab,0xab,0xab,0x1b18,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12, -0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc12,0xc0c,0xc0c,0xc0f,0xc72,0xc72,0xae, -0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18, -0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc15,0xc15,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e, -0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xb4,0xc1e,0xc1e,0xc1e,0xb4,0xc1b,0xc1b,0xb4,0xb4,0xb4,0xb4, -0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02, -0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02, -0xd02,0xd02,0xd02,0xd02,0xd02,0x14e8,0x14e8,0xb7,0xcf3,0xcf3,0xcf3,0xcff,0xcff,0xcff,0xcff,0xcf3, -0xcf3,0xcff,0xcff,0xcff,0xb7,0xb7,0xb7,0xb7,0xcff,0xcff,0xcf3,0xcff,0xcff,0xcff,0xcff,0xcff, -0xcff,0xcf6,0xcf6,0xcf6,0xb7,0xb7,0xb7,0xb7,0xcf9,0xb7,0xb7,0xb7,0xd05,0xd05,0xcfc,0xcfc, -0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08, -0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xba,0xba,0xd08,0xd08,0xd08,0xd08, -0xd08,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0x14eb,0x14eb,0x14eb,0x14eb, -0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb, -0xbd,0xbd,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb, -0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0xbd,0x1a64,0x14eb,0x14eb,0x14eb,0x14eb, -0x14eb,0x14eb,0x14eb,0x14eb,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, -0xc0,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xc0,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xc0,0xd2c,0xd2c,0xc0,0xd2c, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xc0,0xc0, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xc0,0xc0, -0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0, -0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0, -0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, -0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xc3,0xc3,0xc3,0xc3,0xc3, -0xd6e,0xd6e,0xd74,0xc6,0xc6,0xc6,0xc6,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, -0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, -0xc6,0xc6,0xc6,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd35,0xd35,0xd35,0xd35, -0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35, -0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xc9,0xd32,0xd3e,0xd3e,0xd3e,0xd3e, -0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e, -0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xcc,0xcc,0xd3b,0xd3b,0xd3b,0xd3b, -0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0x1824,0x1824,0x1824,0x1824, -0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0xd41,0xd41,0xd41,0xd41, -0xd41,0xd41,0xcf,0xcf,0xd41,0xcf,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xcf,0xd41, -0xd41,0xcf,0xcf,0xcf,0xd41,0xcf,0xcf,0xd41,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, -0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd2, -0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5, -0xdf5,0xdf5,0xdf5,0x14ee,0x14ee,0x179a,0x179a,0xd8,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0, -0x10e0,0x10e0,0x10e0,0x10e0,0x1a73,0x129,0x129,0x129,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07, -0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xdfe, -0xdfe,0xe04,0xe04,0xdfe,0xdb,0xdb,0xe01,0xe01,0x1110,0x1110,0x1110,0x1110,0xde,0xde,0xde,0xde, -0xde,0xde,0xde,0xde,0xde,0xde,0xde,0xde,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f, -0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xe19,0xe16,0xe19,0xe16,0xe16,0xe0d,0xe0d,0xe0d, -0xe0d,0xe0d,0xe0d,0x115b,0x1158,0x115b,0x1158,0x1155,0x1155,0x1155,0x13e6,0x13e3,0xe1,0xe1,0xe1,0xe1, -0xe1,0xe13,0xe10,0xe10,0xe10,0xe0d,0xe13,0xe10,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe4, -0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe4, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe4,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe4, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe4,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22, -0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f, -0xe1f,0xe1f,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xea,0x13e9, -0xea,0xea,0xea,0xea,0xea,0x13e9,0xea,0xea,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, -0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe3d,0xe31,0xe31,0xe31,0xed,0xe31,0xe31,0xed, -0xed,0xed,0xed,0xed,0xe31,0xe31,0xe31,0xe31,0xe3d,0xe3d,0xe3d,0xe3d,0xed,0xe3d,0xe3d,0xe3d, -0xed,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d, -0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0x1905,0x1905,0xed,0xed,0xe2e,0xe2e,0xe2e,0xed, -0xed,0xed,0xed,0xe34,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0x1902,0xed,0xed,0xed, -0xed,0xed,0xed,0xed,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe40,0xe40,0xe37,0xed,0xed,0xed, -0xed,0xed,0xed,0xed,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0x1161,0x1161, -0xf0,0xf0,0xf0,0xf0,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4f,0xe4f,0xe4f,0xe4c,0xe4c,0xe4f,0xe4c, -0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, -0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0x115e,0xf0,0xf0,0xf0,0xe46,0xe46, -0xe55,0xe55,0xe55,0xe55,0xf3,0xf3,0xf3,0xf3,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55, -0xe52,0xe55,0xe55,0xe55,0xe55,0xe55,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3, -0x14fd,0x1503,0x1500,0x1845,0x17a0,0x1869,0x1869,0x1869,0x1869,0x1869,0x190b,0x1908,0x190e,0x1908,0x190e,0x19cb, -0x1a67,0x1a67,0x1a67,0x1b2a,0x1b2a,0x1b24,0x1b21,0x1b24,0x1b21,0x1b24,0x1b21,0x1b24,0x1b21,0x1b27,0xf6,0xf6, -0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6, -0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6, -0xe79,0xe79,0xe79,0xe76,0xe76,0xe6d,0xe6d,0xe76,0xe73,0xe73,0xe73,0xe73,0x1a6a,0xf9,0xf9,0xf9, -0x12cc,0x12cc,0x12cc,0x12cf,0x12cf,0x12cf,0x12c6,0x12c6,0x12c9,0x12c6,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d, -0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0x13f5,0x13f5,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xe7f, -0x1335,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0x1332, -0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42, -0xeac,0xe9d,0xe97,0xea9,0xea6,0xea0,0xea0,0xeaf,0xe9a,0xea3,0xff,0xff,0xff,0xff,0xff,0xff, -0xf33,0xf33,0xf1e,0xf33,0xf36,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0x1b2d,0x105,0x105,0x105, -0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf3f,0xf3f,0xf24,0xf2a,0xf3f,0xf3f, -0xf27,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf21,0xf21,0xf21,0xf21,0xf21, -0xf21,0xf21,0xf21,0xf21,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0x1b30,0x1b30,0x105, -0x1b39,0x1b33,0x19d1,0x19ce,0x19d1,0x19d1,0x19d1,0x1a70,0x1a6d,0x1a70,0x1a6d,0x108,0x108,0x108,0x108,0x108, -0x1b39,0x1b33,0x108,0x1b33,0x108,0x1b33,0x1b39,0x1b33,0x1b39,0x1b33,0x108,0x108,0x108,0x108,0x108,0x108, -0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x1b36,0x1b36, -0x1b36,0x1a70,0x1a6d,0x150c,0x13fe,0x13fe,0x1338,0x103b,0x103b,0x103b,0x103b,0x103b,0xf4e,0xf4e,0xf4e,0xf4e, -0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e, -0xf4b,0xf4b,0xf51,0xf51,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0xf5a,0xf5a,0xf5a,0xf5a, -0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a,0xf5a, -0xf5a,0xf5a,0xf54,0xf54,0xf54,0xf54,0x116a,0x116a,0x10e,0x10e,0x10e,0xf57,0x1512,0x1512,0x1512,0x1512, -0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512, -0x1512,0x1512,0x1512,0x1512,0x1512,0x16f2,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111, -0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111, -0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0xf63,0xf63,0xf63,0x1518,0x1518,0x1518,0x1518,0x1518, -0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x114,0xf60,0xf60,0xf60,0xf60,0x1515,0x114,0x114,0x114, -0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66, -0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d, -0x191d,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x1062,0x1062,0x1062,0x1062,0x105f,0x105f,0x105f,0x105f, -0x105f,0x105f,0x105f,0x105f,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x105f,0x105f,0x1056,0x1053, -0x11a,0x11a,0x11a,0x1065,0x1065,0x1059,0x1059,0x1059,0x105c,0x105c,0x105c,0x105c,0x105c,0x105c,0x105c,0x105c, -0x105c,0x105c,0x11a,0x11a,0x11a,0x1062,0x1062,0x1062,0x1068,0x1068,0x1068,0x1068,0x1068,0x1068,0x1068,0x1068, -0x1068,0x1068,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d, -0x107d,0x107d,0x1080,0x1080,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d, -0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x10a7,0x10a7,0x10a7,0x10a7,0x10a1,0x17a6,0x120,0x120, -0x120,0x120,0x120,0x120,0x120,0x120,0x10ad,0x10ad,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4, -0x10a4,0x10a4,0x120,0x120,0x120,0x120,0x120,0x120,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10bf, -0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10c5,0x10c8,0x123,0x123,0x123,0x123, -0x123,0x123,0x123,0x123,0x123,0x123,0x123,0x10c2,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da, -0x10da,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10d7,0x10d7,0x10ce,0x10ce,0x10d7,0x10d7,0x10ce,0x10ce,0x126, -0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x10da,0x10da,0x10da,0x10ce,0x10da,0x10da,0x10da,0x10da, -0x10da,0x10da,0x10da,0x10da,0x10ce,0x10d7,0x126,0x126,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4, -0x10d4,0x10d4,0x126,0x126,0x10d1,0x10dd,0x10dd,0x10dd,0x1524,0x129,0x129,0x129,0x129,0x129,0x129,0x129, -0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129, -0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3, -0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3, -0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e6,0x12c,0x12c,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9, -0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9, -0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x12f,0x12f,0x12f,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec, -0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x132,0x132,0x132,0x132,0x132,0x132,0x132, -0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2, -0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2, -0x10f2,0x10f2,0x135,0x135,0x135,0x135,0x135,0x10ef,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5, -0x10f5,0x10f5,0x10f5,0x10f5,0x138,0x138,0x138,0x138,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8, -0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x13b,0x13b,0x13b,0x13b, -0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x1170,0x1170,0x1170,0x1170,0x1179,0x1170,0x1170,0x1170, -0x1179,0x1170,0x1170,0x1170,0x1170,0x116d,0x13e,0x13e,0x1176,0x1176,0x1176,0x1176,0x1176,0x1176,0x1176,0x117c, -0x1176,0x117c,0x1176,0x1176,0x1176,0x117c,0x117c,0x13e,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f, -0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x141,0x141, -0x141,0x141,0x141,0x141,0x141,0x141,0x141,0x141,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a, -0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x1197,0x1182,0x1197, -0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x144,0x118b,0x1194,0x1182,0x1194,0x1194,0x1182,0x1182,0x1182, -0x1182,0x1182,0x1182,0x1182,0x1182,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x1182,0x1182,0x1188,0x1188,0x1188, -0x1188,0x1188,0x1188,0x1188,0x1188,0x144,0x144,0x1185,0x1191,0x1191,0x1191,0x1191,0x1191,0x1191,0x1191,0x1191, -0x1191,0x1191,0x144,0x144,0x144,0x144,0x144,0x144,0x1191,0x1191,0x1191,0x1191,0x1191,0x1191,0x1191,0x1191, -0x1191,0x1191,0x144,0x144,0x144,0x144,0x144,0x144,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x119d, -0x11a0,0x11a0,0x11a0,0x11a0,0x118e,0x118e,0x144,0x144,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563, -0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1560,0x1a85,0x12e1,0x12ba,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8, -0x12d8,0x12c0,0x12bd,0x12b4,0x12b4,0x12de,0x12b4,0x12b4,0x12b4,0x12b4,0x12c3,0x149d,0x14a3,0x14a0,0x14a0,0x18e4, -0x16b9,0x16b9,0x1a52,0x147,0x147,0x147,0x147,0x147,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5, -0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11ac,0x11ac,0x11af,0x11b8,0x11b2,0x11b2,0x11b2,0x11b8, -0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5, -0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x12a5, -0x12a5,0x12a5,0x12a5,0x12a5,0x12a5,0x150,0x150,0x150,0x11d6,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11cd, -0x11dc,0x11dc,0x11ca,0x11ca,0x11ca,0x11ca,0x153,0x12d5,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0, -0x11d0,0x11d0,0x153,0x153,0x153,0x153,0x11ca,0x11ca,0x11fa,0x11ee,0x11fa,0x156,0x156,0x156,0x156,0x156, -0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156, -0x156,0x156,0x156,0x11f7,0x11f7,0x11fd,0x11f1,0x11f4,0x1212,0x1212,0x1212,0x120c,0x120c,0x1203,0x120c,0x120c, -0x1203,0x120c,0x120c,0x1215,0x120f,0x1206,0x159,0x159,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209, -0x1209,0x1209,0x159,0x159,0x159,0x159,0x159,0x159,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x15c, -0x15c,0x15c,0x15c,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218, -0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218, -0x15c,0x15c,0x15c,0x15c,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224, -0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x15f,0x1221,0x121e,0x121e,0x121e,0x121e, -0x121e,0x121e,0x121e,0x121e,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x162,0x162,0x162,0x122d,0x1230,0x1230, -0x1230,0x1230,0x1230,0x1230,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239, -0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x165,0x165,0x1236,0x1236,0x1236,0x1236, -0x1236,0x1236,0x1236,0x1236,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f, -0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x168,0x168,0x168,0x168,0x168,0x123c,0x123c,0x123c,0x123c, -0x123c,0x123c,0x123c,0x123c,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245, -0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245, -0x1245,0x1245,0x1245,0x16e,0x125d,0x125d,0x1b3c,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171, -0x171,0x1926,0x171,0x171,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c, -0x147c,0x147c,0x147c,0x147c,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827, -0x1827,0x1a76,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174, -0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174, -0x174,0x174,0x174,0x174,0x174,0x174,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344, -0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344, -0x12ae,0x13a7,0x13a7,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177, -0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab, -0x12ab,0x12ab,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x182a,0x177,0x177,0x177,0x177, -0x12a8,0x12a8,0x12a8,0x12a8,0x12a8,0x12a8,0x12a8,0x12a8,0x12a8,0x177,0x177,0x177,0x177,0x177,0x177,0x177, -0x13cb,0x13cb,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177, -0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177, -0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177, -0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x177,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d, -0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d, -0x134d,0x1347,0x1347,0x1347,0x17a,0x17a,0x134a,0x17a,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x1350,0x1359, -0x1353,0x1353,0x1359,0x1359,0x1359,0x1353,0x1359,0x1353,0x1353,0x1353,0x135c,0x135c,0x17d,0x17d,0x17d,0x17d, -0x17d,0x17d,0x17d,0x17d,0x1356,0x1356,0x1356,0x1356,0x180,0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x180, -0x180,0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x180,0x180,0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x180, -0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x180, -0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x1362,0x180,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, -0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1368,0x137a, -0x137a,0x136e,0x136e,0x136e,0x136e,0x136e,0x183,0x183,0x183,0x183,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b, -0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371, -0x1371,0x1371,0x1371,0x1371,0x1b42,0x1b45,0x1b45,0x1b3f,0x1b3f,0x1b45,0x183,0x183,0x183,0x183,0x183,0x183, -0x183,0x183,0x183,0x1533,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d, -0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x186,0x186,0x186, -0x186,0x186,0x186,0x186,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380, -0x1380,0x1380,0x1380,0x189,0x189,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380, -0x1380,0x1380,0x1380,0x1536,0x189,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380, -0x1380,0x1380,0x1380,0x13b0,0x189,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380, -0x1380,0x1380,0x1380,0x1380,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536, -0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x189,0x189,0x189,0x189,0x189,0x189, -0x189,0x189,0x189,0x189,0x13c5,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542, -0x16b0,0x1542,0x1542,0x1542,0x1782,0x1833,0x1833,0x186c,0x186c,0x1a34,0x1adf,0x1adf,0x18c,0x18c,0x18c,0x18c, -0x1c2c,0x1bae,0x1bae,0x1bae,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x16ad, -0x16ad,0x18c,0x18c,0x18c,0x1542,0x1542,0x1542,0x1542,0x1833,0x1833,0x1833,0x18cf,0x18cf,0x19b0,0x1a34,0x1adf, -0x1adf,0x18c,0x18c,0x18c,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383, -0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1bd2,0x1bd2,0x1bd2,0x18f,0x18f,0x18f,0x18f,0x1bd2, -0x1bd2,0x1bd2,0x1bd2,0x1bd2,0x141f,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f, -0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f, -0x141f,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x192,0x141f,0x192,0x192,0x141f,0x192,0x141f,0x141f,0x141f, -0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x141f,0x141f,0x192,0x141f,0x192,0x141f, -0x192,0x192,0x192,0x192,0x192,0x192,0x141f,0x192,0x192,0x192,0x192,0x141f,0x192,0x141f,0x192,0x141f, -0x192,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x192,0x141f,0x192,0x192,0x141f,0x192,0x141f,0x192,0x141f, -0x192,0x141f,0x192,0x141f,0x192,0x141f,0x141f,0x192,0x141f,0x192,0x192,0x141f,0x141f,0x141f,0x141f,0x192, -0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x141f, -0x141f,0x192,0x141f,0x192,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x192,0x141f, -0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f, -0x192,0x192,0x192,0x192,0x192,0x141f,0x141f,0x141f,0x192,0x141f,0x141f,0x141f,0x141f,0x141f,0x192,0x141f, -0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f, -0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192, -0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192, -0x141c,0x141c,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192, -0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1422,0x1422,0x1422,0x1422,0x1422,0x1431,0x1422,0x1425,0x1425, -0x1422,0x1422,0x1422,0x1428,0x1428,0x195,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e, -0x142b,0x1437,0x1437,0x1437,0x1932,0x192f,0x192f,0x1a7c,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195, -0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2, -0x1443,0x1443,0x1443,0x1443,0x1443,0x1443,0x1443,0x1443,0x1443,0x1443,0x1443,0x1440,0x143a,0x143a,0x1440,0x1440, -0x1449,0x1449,0x1443,0x1446,0x1446,0x1440,0x143d,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198, -0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c, -0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x19b,0x19b,0x19b,0x19b,0x1707,0x1707,0x144c,0x144c, -0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707, -0x19b,0x19b,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707, -0x1458,0x1458,0x1458,0x1458,0x1458,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19e,0x19e,0x19e,0x19e,0x19d7, -0x1458,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, -0x19da,0x19da,0x19da,0x19da,0x19da,0x19da,0x19da,0x19da,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x1452, -0x1452,0x1452,0x1452,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b, -0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1, -0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1, -0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, -0x12db,0x12d8,0x12db,0x12b7,0x12d8,0x12de,0x12de,0x12e1,0x12de,0x12e1,0x12e4,0x12d8,0x12e1,0x12e1,0x12d8,0x12d8, -0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1482,0x148b,0x1482,0x148b,0x148b, -0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x148e,0x1485,0x19e0,0x1b51,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7, -0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1aa,0x1aa, -0x1551,0x1551,0x1551,0x1551,0x1551,0x1557,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa, -0x155d,0x155d,0x155d,0x155d,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x155a, -0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x2b2,0x1b9c,0x1b9c,0x1b9c,0x1b9c, -0x16bc,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3, -0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x1b0,0x1b0,0x1b0,0x1b0, -0x1a85,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b57,0x1b54,0x1b54,0x1b54,0x1b3, -0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3, -0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3, -0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6, -0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x1b6,0x1b6,0x1b6, -0x1b6,0x1b6,0x1b6,0x1b6,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x1b6,0x1b6, -0x156c,0x1566,0x1569,0x1572,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1b9,0x1b9,0x1b9,0x1b9, -0x1b9,0x1b9,0x1b9,0x1b9,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d, -0x155d,0x155d,0x155d,0x155d,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578, -0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1935,0x1935,0x1935,0x1935,0x1bd5,0x1bc,0x1bc, -0x1bc,0x1bc,0x1bc,0x1bc,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37, -0x1bc,0x1bc,0x1bc,0x1bc,0x1bb1,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc, -0x1bc,0x1bc,0x1bc,0x1bc,0x171c,0x16bf,0x1581,0x16c5,0x1bf,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a, -0x158a,0x1bf,0x1bf,0x158a,0x158a,0x1bf,0x1bf,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a, -0x158a,0x158a,0x158a,0x158a,0x158a,0x1bf,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x1bf,0x158a,0x158a, -0x1bf,0x158a,0x158a,0x158a,0x158a,0x158a,0x1bf,0x19bc,0x16c2,0x158a,0x157b,0x1581,0x157b,0x1581,0x1581,0x1581, -0x1581,0x1bf,0x1bf,0x1581,0x1581,0x1bf,0x1bf,0x1584,0x1584,0x1587,0x1bf,0x1bf,0x171f,0x1bf,0x1bf,0x1bf, -0x1bf,0x1bf,0x1bf,0x157b,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x158d,0x158a,0x158a,0x158a,0x158a,0x1581,0x1581, -0x1bf,0x1bf,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x1bf,0x1bf,0x1bf,0x157e,0x157e,0x157e,0x157e, -0x157e,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x15a2,0x15a2,0x15a2,0x15a2, -0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x1c2,0x15a2, -0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15b4,0x15b4,0x15b4,0x15a8, -0x15a8,0x15a8,0x15a8,0x15a8,0x15a8,0x15ab,0x15ae,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x15b1,0x15b1,0x15b1,0x15b1, -0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1722,0x1722,0x1722,0x1722, -0x15c0,0x15bd,0x19e3,0x19e3,0x1a8b,0x1a8e,0x1a88,0x1a88,0x1c8,0x1c8,0x1c8,0x1c8,0x174f,0x174f,0x174f,0x174f, -0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb, -0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x15d2,0x15d2,0x15d2,0x15d2, -0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15c9, -0x15cc,0x15cf,0x15d2,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15e1,0x15e1,0x15e1,0x15e1, -0x15e1,0x15d5,0x15d5,0x1d1,0x1d1,0x1d1,0x1d1,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15de,0x15de,0x16c8,0x15de, -0x15de,0x15de,0x15db,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x15ea,0x15ea,0x15ea,0x15ea, -0x15ea,0x1d4,0x1d4,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e4,0x15e4,0x15e4,0x15e4, -0x15e4,0x15e4,0x15e4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x15ed,0x15ff,0x15ff,0x15f3, -0x15fc,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x15f6,0x15f6,0x15f6,0x15f6, -0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1605,0x1605,0x1605,0x1605, -0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605, -0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1da,0x1602,0x1602,0x1602,0x1602, -0x1602,0x1602,0x1602,0x1602,0x1602,0x1602,0x1da,0x1da,0x1da,0x1da,0x1608,0x1608,0x1b8d,0x1b8d,0x1b8d,0x1b8d, -0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1611,0x1611,0x1611,0x1611, -0x1611,0x160b,0x1614,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x160e,0x160e,0x160e,0x160e, -0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x1611,0x1611,0x1611,0x1611,0x1611,0x1dd,0x161a,0x161a,0x161a,0x161a, -0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a, -0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x1e0,0x1626,0x1626,0x1626,0x1626, -0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626, -0x1626,0x1626,0x1623,0x1623,0x1623,0x1623,0x1623,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x163e,0x163e,0x1641,0x1641, -0x1644,0x1635,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x163b,0x163b,0x163b,0x163b, -0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x1e6,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1e6,0x163e, -0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e, -0x163e,0x163e,0x163e,0x163e,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x163e,0x163e,0x163e,0x164d,0x164d,0x164d,0x164d, -0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, -0x164d,0x164d,0x164d,0x164d,0x164d,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1656,0x1656,0x1656,0x1656, -0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1ec,0x1ec, -0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1653,0x1653,0x1653,0x1653,0x1ec,0x1ec,0x1ec,0x1671,0x1671,0x1671,0x1671, -0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1659,0x166b,0x166b,0x1659,0x1659, -0x1659,0x1659,0x1f2,0x1f2,0x166b,0x166b,0x166e,0x166e,0x1659,0x1659,0x166b,0x165f,0x165c,0x1662,0x1674,0x1674, -0x1665,0x1665,0x1668,0x1668,0x1668,0x1674,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b, -0x172b,0x172b,0x172b,0x172b,0x1728,0x1728,0x1728,0x1728,0x1725,0x1725,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2, -0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2, -0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f5,0x1677,0x1677,0x1677, -0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677, -0x1677,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x167a,0x167a,0x167a,0x167a, -0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x1f8,0x1f8,0x1f8,0x1f8,0x167a,0x167a,0x167a,0x167a, -0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x1f8,0x1f8,0x1f8,0x1f8, -0x1f8,0x1f8,0x1f8,0x1f8,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x1f8,0x1f8, -0x1f8,0x1f8,0x1f8,0x1f8,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x1f8,0x1f8,0x1f8,0x1f8, -0x1f8,0x1f8,0x1f8,0x1f8,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a, -0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x1f8,0x1f8,0x1a91,0x1a91,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8, -0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8, -0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x167d,0x168c,0x1683,0x1680, -0x1692,0x1692,0x1686,0x1692,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1689,0x1689,0x1689,0x1689, -0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1698,0x1698,0x1698,0x1698, -0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1fe, -0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x169e,0x1740,0x1740,0x1740,0x1740, -0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1740, -0x1740,0x1740,0x1740,0x1740,0x1740,0x1740,0x1938,0x201,0x201,0x172e,0x172e,0x172e,0x173a,0x173a,0x172e,0x172e, -0x172e,0x172e,0x173d,0x172e,0x172e,0x172e,0x172e,0x1731,0x201,0x201,0x201,0x201,0x1737,0x1737,0x1737,0x1737, -0x1737,0x1737,0x1737,0x1737,0x1737,0x1737,0x1734,0x1734,0x1743,0x1743,0x1743,0x1734,0x1746,0x1746,0x1746,0x1746, -0x1746,0x1746,0x1746,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204, -0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204, -0x204,0x204,0x204,0x204,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758, -0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x20a,0x1758,0x1758,0x20a,0x20a,0x20a,0x20a,0x20a,0x1755, -0x1755,0x1755,0x1755,0x1755,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x20d,0x175b,0x20d,0x175b,0x175b, -0x175b,0x175b,0x20d,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b, -0x175b,0x175b,0x20d,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175b,0x175e,0x20d,0x20d, -0x20d,0x20d,0x20d,0x20d,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7, -0x15b7,0x15b7,0x15b7,0x15b7,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767, -0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210, -0x210,0x210,0x210,0x210,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764, -0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x1761,0x1761, -0x1761,0x1761,0x1761,0x1761,0x176d,0x176d,0x176d,0x176d,0x176a,0x176d,0x176d,0x1770,0x1773,0x1770,0x1770,0x176d, -0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x176a, -0x176a,0x176a,0x176a,0x176a,0x17ca,0x17ca,0x17ca,0x17ca,0x17c1,0x17c1,0x17c1,0x17bb,0x17be,0x17be,0x17be,0x19e6, -0x216,0x216,0x216,0x216,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x216,0x216, -0x216,0x216,0x17c4,0x17c4,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x219,0x17e5,0x17e5, -0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5, -0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e2,0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x219, -0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x17e2,0x17d3,0x17e5,0x17e8,0x17e8,0x17dc,0x17d9,0x17d9,0x219,0x219, -0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df, -0x17df,0x17df,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6, -0x17d6,0x219,0x219,0x219,0x17f4,0x17f7,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd, -0x17fd,0x17fd,0x17fd,0x17fd,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x21c,0x21c,0x21c, -0x21c,0x21c,0x21c,0x21c,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956, -0x1956,0x1956,0x1956,0x1956,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x21f,0x17ee,0x17ee,0x17ee,0x17ee, -0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x21f,0x21f,0x17ee, -0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x21f,0x17ee,0x17ee,0x21f,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x21f, -0x21f,0x21f,0x21f,0x21f,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc, -0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x183c,0x18d8,0x1a40,0x1a43,0x1aeb,0x222,0x222,0x222,0x222,0x222,0x222,0x222, -0x222,0x222,0x222,0x222,0x1ae8,0x1ae8,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222, -0x222,0x222,0x222,0x222,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd, -0x17fd,0x17fd,0x17fd,0x17fd,0x225,0x225,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1, -0x17f1,0x17f1,0x17f1,0x17f1,0x225,0x17fa,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17fa,0x17f1,0x17f1, -0x17fa,0x17f1,0x17f1,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x1800,0x1800,0x1800,0x1800, -0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x228,0x228,0x228,0x228,0x228,0x228,0x228, -0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x1818,0x1818,0x1809,0x1803, -0x1803,0x1818,0x1806,0x181b,0x181b,0x181b,0x181b,0x181e,0x181e,0x1812,0x180f,0x180c,0x1815,0x1815,0x1815,0x1815, -0x1815,0x1815,0x1815,0x1815,0x1815,0x1815,0x1a94,0x1812,0x22b,0x180c,0x193b,0x19e9,0x1a97,0x1a97,0x22b,0x22b, -0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b, -0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x1824,0x1824,0x1824,0x1824, -0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824, -0x22e,0x22e,0x22e,0x22e,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, -0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, -0x22e,0x22e,0x22e,0x22e,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f, -0x183f,0x19b9,0x19b9,0x19b9,0x19b9,0x19b9,0x1a46,0x1a46,0x1a46,0x1a46,0x1a46,0x1a46,0x231,0x231,0x231,0x231, -0x231,0x231,0x231,0x231,0x1bba,0x1bba,0x1bba,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234, -0x234,0x234,0x234,0x234,0x276,0x276,0x1c2f,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276, -0x276,0x276,0x276,0x276,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x237,0x187e,0x187e,0x237,0x187e, -0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e, -0x187e,0x187e,0x187e,0x187e,0x187e,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x237,0x237,0x237,0x1872,0x237, -0x1872,0x1872,0x237,0x1872,0x1872,0x1872,0x1875,0x1872,0x1878,0x1878,0x1881,0x1872,0x237,0x237,0x237,0x237, -0x237,0x237,0x237,0x237,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x237,0x237, -0x237,0x237,0x237,0x237,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, -0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, -0x23a,0x23a,0x23a,0x23a,0x1890,0x1893,0x1893,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d, -0x23d,0x23d,0x23d,0x23d,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96, -0x1b96,0x1b96,0x1b96,0x1b96,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x240, -0x240,0x240,0x240,0x240,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63, -0x1b63,0x1b63,0x1b63,0x1b63,0x18ae,0x18b1,0x18c0,0x18c0,0x18b1,0x18b4,0x18ae,0x18ab,0x243,0x243,0x243,0x243, -0x243,0x243,0x243,0x243,0x1899,0x1884,0x1884,0x1884,0x1884,0x1884,0x1884,0x1896,0x1896,0x1884,0x1884,0x1884, -0x1899,0x1899,0x1899,0x1899,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef, -0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246, -0x246,0x246,0x246,0x246,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941,0x1941, -0x1941,0x1941,0x246,0x246,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1af1,0x1c35,0x1c35,0x1c35,0x1a4f,0x1a4f,0x1a4f,0x1bbd, -0x1bbd,0x279,0x279,0x279,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953, -0x1950,0x1950,0x1950,0x1944,0x1944,0x1944,0x1944,0x1944,0x1944,0x1944,0x1944,0x1944,0x1950,0x194a,0x1947,0x194d, -0x249,0x249,0x249,0x249,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956, -0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x24c, -0x24c,0x1956,0x1956,0x1956,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x24f,0x1965,0x1965,0x24f,0x1965,0x1965, -0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965, -0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1962,0x1962,0x1962,0x1962,0x1962,0x24f,0x1959,0x1959,0x24f,0x1962, -0x1962,0x1959,0x1962,0x195c,0x1965,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x196e,0x196e,0x1971,0x1971, -0x1968,0x1968,0x1968,0x1968,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x196b,0x196b,0x196b,0x196b, -0x196b,0x196b,0x196b,0x196b,0x196b,0x196b,0x252,0x252,0x252,0x252,0x252,0x252,0x1974,0x1974,0x1974,0x1974, -0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1977,0x1974,0x1974,0x1974,0x1977,0x1974,0x1974,0x1974, -0x1974,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x1980,0x1980,0x1980,0x1980, -0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x197a, -0x197a,0x197d,0x197d,0x1983,0x1983,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x1986,0x1986,0x1986,0x1986, -0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986,0x1986, -0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x1989,0x1989,0x1989,0x1989, -0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989,0x1989, -0x1989,0x1989,0x1989,0x198c,0x1995,0x1989,0x1989,0x25e,0x25e,0x25e,0x25e,0x25e,0x1998,0x1998,0x1998,0x1998, -0x1998,0x1998,0x1998,0x199b,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x19a4,0x19a4,0x19a4,0x19a4, -0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x199e,0x199e, -0x199e,0x199e,0x199e,0x199e,0x199e,0x199e,0x199e,0x199e,0x199e,0x19a1,0x19a1,0x19a1,0x19a1,0x19a7,0x19a7,0x19a7, -0x19a7,0x19a7,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264, -0x264,0x264,0x264,0x264,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84,0x1b84, -0x1b84,0x1b84,0x1b84,0x1b84,0x1be7,0x1bed,0x1bed,0x1bed,0x1bed,0x1bed,0x1bed,0x1bea,0x1bea,0x1bea,0x1bea,0x1bea, -0x1bea,0x1bea,0x1bea,0x1bea,0x1bea,0x1bea,0x1bea,0x1bea,0x1bea,0x1bea,0x267,0x267,0x267,0x267,0x267,0x267, -0x267,0x267,0x267,0x267,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb, -0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x26a,0x26a,0x26a,0x26a,0x26a, -0x26a,0x26a,0x26a,0x26a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x26d,0x26d,0x1a0a,0x1a0a, -0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a, -0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a07,0x1a07,0x1a07,0x19fe,0x19fe,0x19fe,0x19fe,0x26d,0x26d,0x19fe,0x19fe, -0x1a07,0x1a07,0x1a07,0x1a07,0x1a01,0x1a0a,0x1a04,0x1a0a,0x1a07,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d, -0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d, -0x26d,0x26d,0x26d,0x26d,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16, -0x1a16,0x270,0x270,0x270,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16, -0x1a19,0x1a19,0x270,0x270,0x273,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c, -0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c, -0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x273,0x273,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276, -0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x1a49,0x1a49,0x1a49,0x276,0x276,0x1c32,0x276,0x276, -0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x1a4c,0x1a4c,0x1a4c,0x1a4c,0x276,0x276,0x276,0x276, -0x276,0x276,0x276,0x276,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, -0x18e1,0x18e1,0x18e1,0x18e1,0x1a4f,0x1a4f,0x1a4f,0x1af1,0x1af1,0x1af1,0x1af1,0x1c35,0x1c35,0x279,0x279,0x279, -0x279,0x279,0x279,0x279,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1af1,0x1af1,0x1af1,0x1af1,0x1af1,0x1af1, -0x1af1,0x1af1,0x1af1,0x1af1,0x1af1,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1c35,0x1c35,0x1c35,0x1af1,0x1af1,0x1af1,0x1af1, -0x1af1,0x1af1,0x1af1,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1c35,0x1c35,0x1c35,0x279,0x1c35,0x1af1,0x1af1,0x1af1,0x1bc0, -0x1bc0,0x1bc0,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x1c35,0x1c35,0x1af1,0x1af1,0x1af1,0x1af1, -0x1af1,0x1af1,0x1af1,0x1bbd,0x1bbd,0x1bbd,0x1c35,0x1c35,0x279,0x279,0x279,0x279,0x1bbd,0x1bbd,0x1bbd,0x1bbd, -0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1c35,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x1bc0,0x1bc0,0x1bc0,0x1bc0, -0x1bc0,0x1bc0,0x1bc0,0x1c38,0x1c38,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x1a25,0x1a1f,0x1a1f,0x1a1f, -0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x27c,0x27c, -0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x1a22,0x1a31,0x1a31,0x1a31,0x1a31, -0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a28,0x1a28,0x1a28,0x1a28,0x1a2e,0x1a2e,0x1a2e,0x1a2e, -0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x27f,0x27f,0x27f,0x27f,0x27f,0x1a2b,0x1a9d,0x1a9d,0x1a9d,0x1a9d, -0x1a9d,0x1a9a,0x1a9a,0x1a9a,0x1a9a,0x1a9a,0x1a9a,0x1a9a,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282, -0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x1ab8,0x1ab8,0x1ab8,0x1ab8, -0x1ab8,0x1ab8,0x1ab8,0x285,0x285,0x1ab8,0x285,0x285,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8, -0x285,0x1ab8,0x1ab8,0x285,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1ab8, -0x1ab8,0x1ab8,0x1ab8,0x1ab8,0x1aa0,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x285,0x1aaf,0x1ab2,0x285,0x285,0x1aa0, -0x1aa0,0x1ab5,0x1aa6,0x1abb,0x1aaf,0x1abb,0x1aaf,0x1aa3,0x1abe,0x1aa9,0x1abe,0x285,0x285,0x285,0x285,0x285, -0x285,0x285,0x285,0x285,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x285,0x285, -0x285,0x285,0x285,0x285,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7, -0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x288,0x288,0x288,0x288,0x288,0x288, -0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288, -0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x1ac4,0x1ac4,0x1ac4,0x1ac4, -0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x28e, -0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x28e, -0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e, -0x28e,0x28e,0x28e,0x28e,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x1ac7,0x28e,0x28e, -0x28e,0x28e,0x28e,0x28e,0x1afa,0x1afa,0x1afa,0x1afa,0x1afa,0x1afa,0x1afa,0x1afa,0x1afa,0x291,0x291,0x291, -0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291, -0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291, -0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x294,0x1aca,0x1aca,0x1acd,0x294,0x294, -0x1ad0,0x1ad0,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294, -0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b60,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x1b63,0x297, -0x1b66,0x1b66,0x297,0x297,0x297,0x297,0x297,0x297,0x1b5d,0x1b5d,0x1b5d,0x1b5d,0x1b5d,0x1b5d,0x1b5d,0x1b5d, -0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c, -0x1b6c,0x1b69,0x1b69,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a, -0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x29d,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x29d,0x1b6f,0x1b6f,0x29d, -0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x29d, -0x1b72,0x1b78,0x1b78,0x1b75,0x1b75,0x1b75,0x2a3,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75, -0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75, -0x1b75,0x2a3,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, -0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7e,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b, -0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x1b7b,0x2a6, -0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x1bf0,0x1bf0,0x1bf0,0x1bf0,0x1bf0,0x1bf0,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, -0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, -0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x1b84,0x1b84,0x1b81,0x1b81, -0x1b81,0x1b81,0x1b87,0x1b87,0x1b87,0x1b87,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, -0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x1a9d,0x1a9d,0x1a9d,0x1a9d, -0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1a9d,0x1b8d,0x1b8d,0x1b8d,0x1b8d, -0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d, -0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x2ac,0x1b8a,0x1b8a,0x1b8a,0x1b8a, -0x1b8a,0x1b8a,0x1b8a,0x1b8a,0x1b8a,0x1b8a,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x1554,0x1554,0x1554,0x1554, -0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1b93,0x1b93,0x1b93,0x1b93, -0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b93,0x1b90,0x2af,0x2af,0x2af,0x2af,0x2af, -0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x1b9c,0x1b9c,0x1b9c,0x1b9c, -0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x2b2,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x2b2, -0x1b9c,0x1b9c,0x2b2,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x2b2,0x1b99, -0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x2b2,0x1b99, -0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x1b99,0x2b2,0x1b99,0x1b99,0x2b2,0x2b2,0x2b2,0x1ba2,0x1ba2,0x1ba2,0x1ba2, -0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x2b5,0x2b5,0x1ba2,0x1ba2,0x1ba2,0x1ba2, -0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x2b5, -0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f, -0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8, -0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8, -0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x1bf3,0x1bf3,0x1bf3,0x1bf9,0x1bf9,0x1bf9,0x1bf9, -0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bfc,0x1bfc,0x1bfc,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, -0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x1bff,0x1bff,0x1bff,0x1bff, -0x1bff,0x1bff,0x1bff,0x1bff,0x1bff,0x1bff,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be, -0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be, -0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02, -0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x2c1,0x2c1,0x2c1,0x2c1, -0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x1c05,0x1c05,0x1c1a,0x1c11,0x1c17,0x1c17,0x1c17,0x1c17, -0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x2c4,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17, -0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17, -0x1c11,0x1c11,0x1c05,0x1c05,0x1c05,0x1c05,0x1c05,0x2c4,0x2c4,0x2c4,0x1c11,0x1c11,0x1c05,0x1c14,0x1c08,0x1c1d, -0x1c1d,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0e,0x1c0e,0x1c0e,0x1c0e, -0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x1c26,0x1c26,0x1c26,0x1c26, -0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c26,0x1c20,0x1c20,0x1c20,0x1c20,0x1c23,0x1c23,0x1c23,0x1c23, -0x1c23,0x1c23,0x1c23,0x1c23,0x1c23,0x1c23,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2ca,0x2ca,0x2ca,0x2ca, -0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca, -0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x18db,0x2dc,0x2dc,0x2dc, -0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc,0x2ca,0x2ca,0x2ca,0x2ca, -0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca, -0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x95d,0x95d,0x1c3b,0x1c3b,0x1c3b,0x1c3b, -0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x2ca,0x2ca,0x2ca,0x2ca, -0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0xc6c,0xc6c,0xc6c,0xc6c, -0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0x129f,0x129f,0x129f,0x2cd,0x2cd,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd, -0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd, -0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0x2cd,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, -0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, -0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0x2d0,0x2d0,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1, -0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x1bab,0x1bab,0x1bab, -0x1bab,0x1c29,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8, -0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8, -0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x2d6,0x2d6,0x1785,0x1785,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9, -0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, -0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4, -0x1af4,0x1af4,0x1af4,0x2df,0x2df,0x2df,0x2df,0x2df,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, -0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x3e7,0x3db,0x3db,0x3db,0x3db,0x3db,0x3db,0x3db, -0x3db,0x3e7,0x3e7,0x3e7,0x3e7,0x3e1,0x111f,0x12f6,0x3ea,0x927,0x92a,0x3d8,0x3d8,0x111c,0x12f3,0x12f3, -0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x111c,0x3db,0x3db,0x3e7,0xcae,0x3ea,0x3ea,0x3ea, -0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, -0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3db,0x3db,0x8af,0x8b2,0x945,0x945, -0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x3e4,0xf7e,0xf7b,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9, -0x14b8,0x1122,0x1122,0xed0,0xed0,0xda1,0xed0,0xed0,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, -0x3ea,0x3ed,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ed,0x3ea,0x3ea,0x3ed,0x3ea,0x3ea,0x3ea, -0x3ea,0x3ea,0x12f3,0x12f6,0x3de,0x3ea,0x3e7,0x3e7,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489, -0x489,0x12ff,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489, -0x489,0x489,0x12ff,0x1857,0x1857,0xf9c,0x47a,0x483,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5, -0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0xba3, -0xba3,0xdb0,0xdb0,0x8b5,0xdad,0x13da,0x13da,0x13da,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8, -0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8, -0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4ce,0x4ce,0x4ce,0x1137,0x1137,0x1137,0x1137,0x1137, -0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, -0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, -0x4cb,0x4cb,0x1134,0x1134,0x1134,0x1134,0x1134,0x1134,0x4d1,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce, -0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce, -0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4da,0x4d4,0x4da,0x4d4, -0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4, -0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4d4,0x4d4, -0x4d4,0x4d4,0x4d7,0x9a2,0xfc9,0xfc9,0xfcc,0xfc9,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4, -0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4, -0x4da,0x4d4,0xfcc,0xfc9,0xfcc,0xfc9,0xfcc,0xfc9,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6, -0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x6a5,0x6a5,0x6a8,0x504,0x6b4,0x6b1,0x6b1,0x6ae, -0x52e,0x52e,0x4ec,0x4ec,0x4ec,0x4ec,0x4ec,0xb34,0x6b7,0x510,0x6cf,0x6d2,0x525,0x6b7,0x513,0x513, -0x504,0x51f,0x51f,0x6a5,0x52b,0x528,0x6ab,0x4fe,0x4f5,0x4f5,0x4f8,0x4f8,0x4f8,0x4f8,0x4f8,0x4fb, -0x4f8,0x4f8,0x4f8,0x4ef,0x537,0x534,0x531,0x531,0x6c3,0x519,0x516,0x6c0,0x6bd,0x6ba,0x6cc,0x507, -0x6c9,0x6c9,0x51c,0x51f,0x6c6,0x6c6,0x51c,0x51f,0x501,0x504,0x504,0x504,0x522,0x50d,0x50a,0xbb8, -0xad7,0xad7,0xad4,0xad4,0xad4,0xad4,0xbaf,0xbaf,0xbaf,0xbaf,0xbb5,0xcdb,0xcd8,0xdbc,0xdbf,0xbb2, -0xdbf,0xdbf,0xdbf,0xdbf,0xdbc,0xdbf,0xdbf,0xbac,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x558, -0x55e,0x73e,0x55b,0x9a5,0x9c6,0xada,0xada,0xada,0xbbe,0xbbe,0xdc5,0xdc5,0xdc5,0xdc5,0x1140,0x1143, -0x1143,0x1314,0x14a6,0x14d0,0x14d3,0x14d3,0x16da,0x185a,0x56a,0x56a,0x582,0x6e4,0x567,0x6de,0x56a,0x57f, -0x567,0x6e4,0x579,0x582,0x582,0x582,0x579,0x579,0x582,0x582,0x582,0x6ea,0x567,0x582,0x6e7,0x567, -0x576,0x582,0x582,0x582,0x582,0x582,0x567,0x567,0x56d,0x6de,0x6e1,0x567,0x582,0x567,0x6ed,0x567, -0x582,0x570,0x588,0x6f0,0x582,0x582,0x573,0x579,0x582,0x582,0x585,0x582,0x579,0x57c,0x57c,0x57c, -0x57c,0xae3,0xae0,0xcde,0xdce,0xbd3,0xbd6,0xbd6,0xbd0,0xbcd,0xbcd,0xbcd,0xbcd,0xbd6,0xbd3,0xbd3, -0xbd3,0xbd3,0xbca,0xbcd,0xdcb,0xedc,0xedf,0xfd2,0x1146,0x1146,0x1146,0x6f6,0x6f3,0x58b,0x58e,0x58e, -0x58e,0x58e,0x58e,0x6f3,0x6f6,0x6f6,0x6f3,0x58e,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc, -0x6fc,0x6fc,0x6fc,0x6fc,0x597,0x597,0x597,0x597,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9, -0x6f9,0x6f9,0x591,0x591,0x591,0x591,0x591,0x591,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, -0x59a,0x59d,0x59d,0x59d,0x59d,0x59d,0x5a0,0x59a,0x59d,0x59d,0x59a,0x59a,0x59a,0x59a,0x59d,0x59d, -0x6ff,0x6ff,0x59a,0x59a,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, -0x59d,0x5a0,0x5a0,0x5a0,0x59d,0x59d,0x702,0x59d,0x702,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, -0x59a,0x59d,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59d,0x59d,0x59a,0x6ff,0x59a,0x59a,0x59a,0xae9, -0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9, -0xbd9,0xbd9,0xbd9,0xbd9,0x705,0x5a3,0x705,0x705,0x5a6,0x5a3,0x5a3,0x705,0x705,0x5a6,0x5a3,0x705, -0x5a6,0x5a3,0x5a3,0x705,0x5a3,0x705,0x5b2,0x5af,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3, -0x5a3,0x705,0x705,0x705,0x705,0x5a3,0x5a3,0x705,0x5a6,0x705,0x5a6,0x705,0x705,0x705,0x705,0x705, -0x70b,0x5a9,0x705,0x5a9,0x5a9,0x5a3,0x5a3,0x5a3,0x705,0x705,0x705,0x705,0x5a3,0x5a3,0x5a3,0x5a3, -0x705,0x705,0x5a3,0x5a3,0x5a3,0x5a6,0x5a3,0x5a3,0x5a6,0x5a3,0x5a3,0x5a6,0x705,0x5a6,0x5a3,0x5a3, -0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, -0x5a3,0x5a3,0x5a3,0x5a3,0x708,0x705,0x5a6,0x5a3,0x705,0x705,0x705,0x705,0x5a3,0x5a3,0x705,0x705, -0x5a3,0x5a6,0x708,0x708,0x5a6,0x5a6,0x5a3,0x5a3,0x5a6,0x5a6,0x5a3,0x5a3,0x5a6,0x5a6,0x5a3,0x5a3, -0x5a3,0x5a3,0x5a3,0x5a3,0x5a6,0x5a6,0x705,0x705,0x5a6,0x5a6,0x705,0x705,0x5a6,0x5a6,0x5a3,0x5a3, -0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3, -0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a6,0x5a6,0x5a6,0x5a6, -0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705, -0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, -0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, -0x5a6,0x5a6,0x5a6,0x5a6,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a6,0x5a6,0x5a6,0x5a6,0x5a3,0x5ac, -0x5a3,0x5a3,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc, -0x5b5,0xaec,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5be,0x5bb,0x5be,0x5bb,0x5b5,0x5b5,0x5b5,0x5b5, -0x5b5,0x5b5,0x70e,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x7fe,0x7fe,0x5b5,0x5b5,0x5b5,0x5b5, -0x5b8,0x5b8,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x804,0x801,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, -0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, -0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0xaec, -0xbe2,0xaec,0xaec,0xaec,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x5c7,0xc3f, -0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f, -0xc3f,0xc3f,0xc3f,0xd4d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, -0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x5ca,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd, -0x5cd,0x5cd,0x5cd,0x5cd,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, -0x5cd,0x5cd,0x5cd,0x5cd,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, -0x71d,0x71d,0x71d,0x71d,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720, -0x720,0x720,0x720,0x720,0x5d0,0x5d0,0x720,0x720,0x720,0x720,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5, -0xbe5,0xbe5,0xbe5,0xbe5,0x726,0x726,0x5d3,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x5d3,0x5d3, -0x5d3,0x5d3,0x5d6,0x5d6,0x5d6,0x5d6,0x726,0x726,0x5d6,0x5d6,0x726,0x726,0x5d3,0x5d3,0x5d3,0x5d3, -0x726,0x726,0x5d6,0x5d6,0x726,0x726,0x5d3,0x5d3,0x5d3,0x5d3,0x726,0x726,0x723,0x5d3,0x5d6,0x726, -0x5d3,0x5d3,0x723,0x726,0x726,0x726,0x5d6,0x5d6,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3, -0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x726,0x723,0x726,0x723,0x5d3,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6, -0x5d6,0x5d3,0x5d3,0x723,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xbe8,0xbe8,0xbe8,0xbe8, -0xbe8,0xc57,0xc57,0xbe8,0x5dc,0x5dc,0x5dc,0x5dc,0x5d9,0x72f,0x72f,0x5d9,0x5d9,0x729,0x5d9,0x5d9, -0x5d9,0x5d9,0x729,0x729,0x5d9,0x5d9,0x5d9,0x5d9,0xd56,0xd56,0xbeb,0xbeb,0xdd7,0xaf5,0x5dc,0x5dc, -0x72c,0x5df,0x72c,0x5dc,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9, -0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5dc,0x5dc,0x5dc, -0x5d9,0x5d9,0x5d9,0x5d9,0x72f,0x5d9,0x72f,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x807,0x807,0x807,0x807, -0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9, -0x5d9,0x5d9,0x5d9,0x5d9,0x72f,0x72f,0x5e2,0x72f,0x729,0x729,0x5d9,0x729,0x72c,0x729,0x729,0x5d9, -0x729,0x72f,0x5e2,0x72f,0xaf5,0xaf5,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee, -0xbee,0xbee,0xdd4,0xe8b,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5, -0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e8,0x139b,0x139b,0x139b,0x5e8,0x5e8,0x5e8,0x5e8, -0x5e8,0x5e8,0x5e8,0x5e8,0x14d9,0x5ee,0x5ee,0x5ee,0x5ee,0x139b,0x5e8,0x5e8,0x5ee,0x5ee,0x139e,0x139e, -0x5f4,0x5f4,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, -0x5e8,0x5e8,0x5e8,0x5e8,0x139b,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, -0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x735,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, -0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x139b,0x5e8,0x139b,0x5e8,0x5e8,0x5e8,0x5e8,0x139b, -0x139b,0x139b,0x5e8,0x1299,0x5e8,0x5e8,0x5e8,0x5f1,0x5f1,0x5f1,0x5f1,0x1320,0x1320,0x5e8,0x5eb,0x5eb, -0x5ee,0x5e8,0x5e8,0x5e8,0xbf4,0xbf1,0xbf4,0xbf1,0xbf4,0xbf1,0xbf4,0xbf1,0xbf4,0xbf1,0xbf4,0xbf1, -0xbf4,0xbf1,0x732,0x732,0x732,0x732,0x732,0x732,0x732,0x732,0x732,0x732,0x5e8,0x5e8,0x5e8,0x5e8, -0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x139b,0x5e8,0x5e8,0x5e8, -0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x139b,0x615,0x615,0x615,0x615, -0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, -0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x60c,0x60c,0x60c,0x60c,0x60c,0x60c, -0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, -0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0xb46,0xb46,0xb46,0xb46, -0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0x615,0x615,0x95a,0x615, -0x615,0x615,0x615,0x615,0x615,0x615,0x60c,0x60c,0xbf7,0xd7a,0x1b0f,0x1b0f,0x612,0x618,0x615,0x60f, -0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, -0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x615,0x60f,0x615,0x60f, -0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, -0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x615,0x60f,0x612,0x618,0x615,0x60f,0x615,0x60f, -0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x615,0x60f,0x1323,0x1323,0x1323,0x1323,0x1323,0x1323, -0x1323,0x1323,0x1323,0x1323,0x1323,0x1323,0x1323,0x1323,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x612,0x618, -0x612,0x618,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f, -0x612,0x615,0x60f,0x612,0x615,0x60f,0x612,0x618,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, -0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x612, -0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, -0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, -0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x612,0x612,0x612,0x612, -0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x60f,0x615,0x918,0x91b,0x1b0f,0x1b0f,0x1b0f,0x1b0f, -0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x1b0f,0x612,0x60f,0x612,0x612, -0x612,0x612,0x612,0x612,0x60f,0x612,0x60f,0x60f,0x612,0x612,0x60f,0x60f,0x612,0x612,0x60f,0x612, -0x60f,0x612,0x60f,0x60f,0x612,0x60f,0x60f,0x612,0x60f,0x612,0x60f,0x60f,0x612,0x60f,0x612,0x612, -0x60f,0x60f,0x60f,0x612,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, -0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, -0x612,0x612,0x60f,0x60f,0x612,0x60f,0x612,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x612, -0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, -0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x618, -0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, -0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, -0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, -0x618,0x618,0x618,0x618,0x618,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, -0x61b,0x61b,0x61b,0x61b,0xfde,0xfde,0xfde,0x14dc,0x14dc,0x14dc,0x14dc,0x14dc,0x14dc,0x14dc,0x16e0,0x16e0, -0x864,0x86a,0x86a,0x876,0x876,0x867,0x85e,0x867,0x85e,0x867,0x85e,0x867,0x85e,0x867,0x85e,0x867, -0x62a,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a, -0x624,0x627,0x62d,0x62a,0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, -0x62d,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, -0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, -0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x714,0x714,0x714,0x714, -0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714, -0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711, -0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711, -0x711,0x711,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a, -0x71a,0x71a,0x71a,0x71a,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717, -0x717,0x717,0x717,0x717,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, -0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, -0x71d,0x71d,0x71d,0x71d,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738, -0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738, -0x738,0x738,0x738,0x738,0xc45,0x8c7,0x8c1,0x8be,0x8c4,0x8bb,0x74d,0x750,0x750,0x750,0x750,0x750, -0x750,0x750,0x750,0x750,0x8cd,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d, -0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d, -0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x8ca,0x8ca,0x753,0x8dc,0x8df,0x8e5,0x80a,0x816,0x8fa,0x813, -0x8d3,0x8d0,0x8d3,0x8d0,0x8d9,0x8d6,0x8d9,0x8d6,0x8d3,0x8d0,0x810,0x8e5,0x8d3,0x8d0,0x8d3,0x8d0, -0x8d3,0x8d0,0x8d3,0x8d0,0x8eb,0x8f1,0x8ee,0x8ee,0x759,0x795,0x795,0x795,0x795,0x795,0x795,0x78f, -0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f, -0x78f,0x78f,0x78f,0x75c,0x777,0x756,0x77d,0x780,0x77a,0x792,0x792,0x792,0x792,0x792,0x792,0x78c, -0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c, -0x78c,0x78c,0x78c,0x75c,0x777,0x756,0x777,0xc48,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8, -0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8, -0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x1293,0x1293,0x1293,0x1293,0x1293,0x7fb, -0x810,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x933,0x933,0x933,0x933,0x819,0x819, -0x8e8,0x8f7,0x8f7,0x8f7,0x8f7,0x8f4,0x80d,0x8e2,0xb19,0xb19,0xb19,0xc5a,0xc78,0xc75,0xb37,0x8b8, -0x81f,0x81c,0x81f,0x822,0x81c,0x81f,0x81c,0x81f,0x81c,0x81f,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c, -0x81f,0x81f,0x81c,0x81f,0x81f,0x81c,0x81f,0x81f,0x81c,0x81f,0x81f,0x81c,0x81f,0x81f,0x81c,0x81c, -0xc7b,0x831,0x82b,0x831,0x82b,0x831,0x82b,0x831,0x82b,0x831,0x82b,0x82b,0x82e,0x82b,0x82e,0x82b, -0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b, -0x82e,0x82b,0x82e,0x831,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82b,0x82b,0x82b,0x82b,0x82b, -0x82e,0x82e,0x82b,0x82e,0x82e,0x82b,0x82e,0x82e,0x82b,0x82e,0x82e,0x82b,0x82e,0x82e,0x82b,0x82b, -0x82b,0x82b,0x82b,0x831,0x82b,0x831,0x82b,0x831,0x82b,0x82b,0x82b,0x82b,0x82b,0x82b,0x831,0x82b, -0x82b,0x82b,0x82b,0x82b,0x82e,0x831,0x831,0x82e,0x82e,0x82e,0x82e,0x900,0x903,0x834,0x837,0xc63, -0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, -0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, -0x840,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, -0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x849,0x849,0x849,0x849, -0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849, -0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0xd5f,0xd5f,0xe8e,0x843,0x90c,0x90c,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xd59,0xd59,0xd59,0xd59,0x84c,0x84c,0x84c,0x84c, -0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c, -0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x1a58,0x912,0x912,0x912,0x912, -0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x84f,0x84f,0x84f, -0x84f,0x84f,0x84f,0xd62,0xd62,0xd62,0xd62,0x915,0x915,0x915,0x915,0x915,0x84f,0x84f,0x84f,0x84f, -0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f, -0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0xd62,0xd62, -0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852, -0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852, -0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855, -0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855, -0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91, -0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91, -0x1104,0x1104,0x1104,0x1104,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, -0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, -0x858,0x858,0x858,0x858,0x858,0x858,0x85b,0x85b,0x858,0x85b,0x858,0x85b,0x85b,0x858,0x858,0x858, -0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x85b,0x858,0x85b,0x858,0x85b,0x85b,0x858,0x858,0x85b, -0x85b,0x85b,0x858,0x858,0x858,0x858,0x1497,0x1497,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c, -0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x12d2,0x12d2,0x12d2,0x12d2,0x127b,0x127b,0x127b,0x127b, -0x127b,0x127b,0x127b,0x127b,0xd59,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66, -0xc66,0xc66,0xc66,0xc66,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f, -0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f, -0x90f,0x90f,0x90f,0x90f,0x90f,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xc66, -0xc66,0xc66,0xc66,0xc66,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, -0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, -0x912,0x912,0x912,0xd62,0x99c,0x97e,0x97e,0x97e,0x97e,0x978,0x97e,0x97e,0x990,0x97e,0x97e,0x97b, -0x987,0x98d,0x98d,0x98d,0x98d,0x98d,0x990,0x978,0x984,0x978,0x978,0x978,0x96c,0x96c,0x978,0x978, -0x978,0x978,0x978,0x978,0x993,0x993,0x993,0x993,0x993,0x993,0x993,0x993,0x993,0x993,0x978,0x978, -0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x97b,0x96c,0x978,0x96c,0x978,0x96c,0x98a,0x981, -0x98a,0x981,0x999,0x999,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8, -0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8, -0x9a8,0x9a8,0x9a8,0x9a8,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, -0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, -0x9ab,0x9ab,0x9ab,0x9ab,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, -0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, -0x9ae,0x9ae,0x9ae,0x9ae,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, -0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, -0x9b7,0x9b7,0x9b1,0x9b1,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba, -0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba, -0x9ba,0x9ba,0x9b4,0x9b4,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, -0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, -0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba, -0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba, -0x9ba,0x9ba,0x9ba,0x9ba,0x9bd,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, -0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, -0x9bd,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, -0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0xa4d,0xa4d,0xfc3,0xa4d, -0xa4d,0xa4d,0xa50,0xa4d,0xfc3,0xa4d,0xa4d,0xfba,0xa47,0xa3b,0xa3b,0xa3b,0xa3b,0xa4a,0xa3b,0xfab, -0xfab,0xfab,0xa3b,0xa3e,0xa47,0xa41,0xfb1,0xfbd,0xfbd,0xfab,0xfab,0xfc3,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xa53,0xa53,0xa44,0xa44,0xa44,0xa44,0xa4d,0xa4d,0xa4d,0xa4d, -0xa4d,0xa4d,0xa4a,0xa4a,0xa3b,0xa3b,0xfc3,0xfc3,0xfc3,0xfc3,0xfab,0xfab,0xa4d,0xa4d,0xa4d,0xa4d, -0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d, -0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xdb9,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdb9,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, -0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, -0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e, -0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6b,0xa71,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0x113d, -0x113d,0x113d,0x113d,0x113d,0x113d,0x113d,0x113d,0x113d,0x113a,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e, -0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e, -0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa83,0xa83,0xa83,0xa83, -0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, -0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xaa7,0xaa7,0xaa7,0xaaa, -0xaaa,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7, -0xa8f,0xa8f,0xaa4,0xa86,0xa86,0xa86,0xa86,0xa86,0xa86,0xa86,0xaa4,0xaa4,0xaa7,0xaa7,0xaa7,0xaa7, -0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7, -0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xac8,0xac8,0xac8,0xac8, -0xac8,0xab3,0xab3,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xacb,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8,0xac8, -0xac8,0xac8,0xac8,0xac8,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec, -0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xbe2, -0xbe2,0xbe2,0xbe2,0xbe2,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8, -0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8, -0xaf8,0xaf8,0xaf8,0xaf8,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, -0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0x13a1,0x13a1,0x13a1,0x1ad3, -0x1ad3,0x1ad3,0x1ad3,0x1ad3,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, -0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, -0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb25,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb28,0xb28,0xc69,0xc69,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xc69,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0x14df,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xced,0xced, -0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52, -0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xcea,0xcea,0xd38,0xd38,0xd38,0xd38, -0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xb55,0xb55,0xb55,0xb55, -0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, -0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb58,0xb58,0xb58,0xb58, -0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58, -0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb67,0xb67,0xb67,0xb67, -0xb67,0xb5e,0xb6a,0xb70,0xb70,0xb70,0xb64,0xb64,0xb64,0xb6d,0xb61,0xb61,0xb61,0xb61,0xb61,0xb5b, -0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb70,0xb70,0xb70,0xb70,0xb70,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb67,0xb67, -0xb70,0xb70,0xb70,0xb64,0xb64,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb70,0xb70,0xb70,0xb70,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0x16e3,0x16e3,0xb7c,0xb73,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb73,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb73,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb73,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb73,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x93,0x93,0x93,0x747,0x747,0x747,0x747,0x747,0x747, +0x93,0x93,0x747,0x747,0x747,0x747,0x747,0x747,0x93,0x93,0x747,0x747,0x747,0x747,0x747,0x747, +0x93,0x93,0x747,0x747,0x747,0x93,0x93,0x93,0xb4c,0xb4c,0xb4c,0xb4c,0x96,0x96,0x96,0x96, +0x96,0x96,0x96,0x96,0x96,0x186f,0x186f,0x186f,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52, +0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0x99,0x99,0x99,0x99,0x99, +0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, +0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b, +0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0x9f,0x9f,0xfe4,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x1b24,0x1b24,0x9f,0x9f,0x9f,0x9f,0x9f, +0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xa2,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7f,0xb7f,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xa2,0xb7f,0xb7f, +0xa2,0xa2,0xb7f,0xa2,0xa2,0xb7f,0xb7f,0xa2,0xa2,0xb7f,0xb7f,0xb7f,0xb7f,0xa2,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7c,0xb7c,0xb7c,0xb7c,0xa2,0xb7c,0xa2,0xb7c,0xb7c,0xb7c, +0xb7c,0xcf3,0xb7c,0xb7c,0xa2,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, 0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, -0xb7f,0xb7f,0xb7f,0xb7f,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85, -0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85, -0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc,0x1adc,0x1ba8,0x1ba8, -0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, -0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, -0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2, -0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbdf,0xbe2,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf, -0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xce1,0xce4,0xdd1,0xdd1,0xdd1,0xdd1,0xdd1,0xdd1,0xdd1, -0xdd1,0xdd1,0xdd1,0xdd1,0xee8,0xee8,0xee8,0xee8,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee, -0xbee,0xbee,0xce7,0xce7,0xce7,0xce7,0xce7,0xce7,0xce7,0xce7,0xdd4,0xe88,0xdd4,0xdd4,0xdd4,0xdd4, -0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xfd8,0x1272,0x1272,0xddd,0xddd,0xddd,0xddd,0xddd,0xde3,0xde0,0xefa, -0xefa,0xefa,0xefa,0x13e0,0xfea,0x13e0,0x132c,0x132c,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21, -0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc4e,0xc4b,0xc4e,0xc4b,0xc4e,0xc4b, -0x10fe,0x10fb,0xff0,0xfed,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24, -0xc24,0xc24,0xc24,0xc24,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27, -0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27, -0xc27,0xc27,0xc27,0xc27,0xc2a,0xc2a,0xc2a,0xc30,0xc2d,0xc54,0xc51,0xc30,0xc2d,0xc30,0xc2d,0xc30, -0xc2d,0xc30,0xc2d,0xc30,0xc2d,0xc30,0xc2d,0xc30,0xc2d,0xc30,0xc2d,0xc30,0xc2d,0xc2a,0xc2a,0xc2a, -0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a, -0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a, -0xc30,0xc2d,0xc30,0xc2d,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a, -0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a, -0xc30,0xc2d,0xc2a,0xc2a,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc39,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc39,0xc39,0xc39,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc36,0xc33,0xc33,0xc33,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c, -0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c, -0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xc6c,0xce7,0xd53,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4, -0xdd4,0xdd4,0xe88,0xe88,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xeeb,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8, -0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,0x1296,0x1296,0x1275,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b, -0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b, -0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd11,0xd11, -0xd11,0xd11,0xd11,0xd0e,0xd23,0xd23,0xd23,0xd1d,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23, -0xd23,0xd23,0xd23,0xd1d,0xd23,0xd23,0xd23,0xd23,0xd17,0xd17,0xd20,0xd20,0xd20,0xd20,0xd14,0xd14, -0xd14,0xd14,0xd14,0xd1a,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9, -0xde6,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xde9,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23, -0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd1d,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23, -0xd23,0xd23,0xd23,0xd23,0xd23,0xd17,0xd17,0xd17,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a, -0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a, -0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26, -0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xefd,0xefd,0xefd,0xefd, -0xefd,0xefd,0xefd,0x1107,0x1107,0xff3,0xff3,0xff3,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29, -0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29, -0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, -0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, -0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xa2,0xb7f,0xb7f,0xb7f,0xb7f,0xa2,0xa2,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xa2,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xa2,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xa2,0xb7f,0xb7f,0xb7f,0xb7f,0xa2, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xa2,0xb7f,0xa2,0xa2,0xa2,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xa2,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xddd,0xddd,0xa2,0xa2,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7c,0xb7c,0xb7c,0xb76, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xef4,0xef1,0xa2,0xa2,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, +0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xa5,0xb85,0xa5,0xa5, +0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5, +0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xc0c,0xc0c,0xc0c,0xc0c, +0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0x1b2a,0xc0c,0xc0c,0xc0c,0xc0c,0xc06,0xc06, +0xc09,0x1b27,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0x1b2a,0xc15,0xc15,0xc15,0xc15, +0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc15,0xc0f,0xc0f, +0xc12,0xc75,0xc75,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xc1b,0xc1b,0xc1b,0xc1b, +0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc1b,0xc18,0xc18, +0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xc21,0xc21,0xc21,0xc21, +0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xc21,0xb1,0xc21,0xc21,0xc21,0xb1,0xc1e,0xc1e, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd05,0xd05,0xd05,0xd05, +0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05, +0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0xd05,0x14f1,0x14f1,0xb4,0xcf6,0xcf6,0xcf6,0xd02, +0xd02,0xd02,0xd02,0xcf6,0xcf6,0xd02,0xd02,0xd02,0xb4,0xb4,0xb4,0xb4,0xd02,0xd02,0xcf6,0xd02, +0xd02,0xd02,0xd02,0xd02,0xd02,0xcf9,0xcf9,0xcf9,0xb4,0xb4,0xb4,0xb4,0xcfc,0xb4,0xb4,0xb4, +0xd08,0xd08,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xd0b,0xd0b,0xd0b,0xd0b, +0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xb7,0xb7, +0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7, +0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4, +0x14f4,0x14f4,0x14f4,0x14f4,0xba,0xba,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4, +0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0xba,0x1a76, +0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xd2f,0xd2f,0xbd,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xbd,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xbd, +0xd2f,0xd2f,0xbd,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xbd,0xbd,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd, +0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd, +0xbd,0xbd,0xbd,0xbd,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32, +0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xc0, +0xc0,0xc0,0xc0,0xc0,0xd71,0xd71,0xd77,0xc3,0xc3,0xc3,0xc3,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e, +0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e, +0xd6e,0xd6e,0xd6e,0xd6e,0xc3,0xc3,0xc3,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74, 0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38, -0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, +0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xc6,0xd35, +0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41, +0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xc9,0xc9, +0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9, +0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833, +0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xcc,0xcc,0xd44,0xcc,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, 0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, -0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50, -0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50, -0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2, -0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2, -0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8, -0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf5,0xdf5,0xdf5, -0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8, -0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8, -0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xeb8,0xeb8,0xe0a,0xe0a,0xf00,0xf00,0xf00,0xf00, -0xf00,0xf00,0xf00,0xfff,0xfff,0x1002,0xfff,0xfff,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc, -0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16, -0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16, -0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe19,0xe16,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25, -0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25, -0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b, -0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0x1b1e,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, -0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0x1b1b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b, -0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b, -0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xeb2,0xeb2,0xeb2,0xeb2,0xeb2,0xeb2,0xeb2,0xeb2, -0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xf03, -0xf03,0xf03,0xf03,0x1005,0x1005,0x1005,0x1005,0x1005,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c, -0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c, -0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55, -0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55, -0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe55,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e, -0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e, -0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe58,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b, -0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b, -0xe5b,0xe5b,0xe5b,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, -0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe61,0xe6a, -0x1011,0x100b,0x101a,0x1008,0xe67,0xe67,0x1008,0x1008,0xe79,0xe79,0xe6d,0xe79,0xe79,0xe79,0xe70,0xe79, -0xe79,0xe79,0xe79,0xe6d,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79, -0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, -0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, -0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5, -0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110, -0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0xee8,0xee8,0xee8,0xee5,0xee5,0xee5,0xee5,0xee5, -0x1149,0x1392,0x1392,0x1392,0x1392,0x1317,0x1317,0x1317,0x1395,0x131a,0x131a,0x1395,0x14d6,0x14d6,0x14d6,0x14d6, -0x14d6,0x14d6,0x14d6,0x1797,0x1797,0x1797,0x1797,0x185d,0xefd,0xefd,0xefd,0xefd,0xff3,0xff3,0xff3,0xff3, -0xff3,0xff3,0xff3,0xff3,0xff3,0xff3,0xff3,0xff3,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6, -0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0x14f1, -0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f4,0x1866,0x1866, -0x18e7,0x1866,0x1bc6,0x179d,0x132f,0x1152,0xf00,0xf00,0xf1e,0xf1e,0xf1e,0xf1e,0xf30,0xf39,0xf3c,0xf39, -0xf3c,0xf39,0xf3c,0xf39,0xf3c,0xf39,0xf3c,0xf39,0xf39,0xf39,0xf3c,0xf39,0xf39,0xf39,0xf39,0xf39, -0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39, -0xf21,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf33,0xf1e,0xf33,0xf30,0xf30,0xf45,0xf42,0xf45,0xf45, -0xf45,0xf42,0xf42,0xf45,0xf42,0xf45,0xf42,0xf45,0xf42,0x102c,0x102c,0x102c,0x1167,0x1023,0x102c,0x1023, -0xf42,0xf45,0xf42,0xf42,0x1023,0x1023,0x1023,0x1023,0x1026,0x1029,0x1167,0x1167,0xf48,0xf48,0x103e,0x1035, -0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x1035,0x1035,0x103e,0x1035, -0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0xf4e,0xf4e,0xf4e,0xf4e, -0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e, -0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf5d,0xf5d,0xf5d,0xf5d, +0xd44,0xd44,0xcc,0xd44,0xd44,0xcc,0xcc,0xcc,0xd44,0xcc,0xcc,0xd44,0xd47,0xd47,0xd47,0xd47, +0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47, +0xd47,0xd47,0xd47,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0xdf8,0xdf8,0xdf8,0xdf8, +0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0x14f7,0x14f7,0x17a9,0x17a9,0xd5,0x10e6,0x10e6,0x10e6,0x10e6, +0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x1a85,0x126,0x126,0x126,0xe0a,0xe0a,0xe0a,0xe0a, +0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a, +0xe0a,0xe0a,0xe0a,0xe01,0xe01,0xe07,0xe07,0xe01,0xd8,0xd8,0xe04,0xe04,0x1113,0x1113,0x1113,0x1113, +0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0x1c56,0xc72,0xc72,0xc72,0xc72, +0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xe1c,0xe19,0xe1c,0xe19, +0xe19,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0x1161,0x115e,0x1161,0x115e,0x115b,0x115b,0x115b,0x13ef,0x13ec, +0xde,0xde,0xde,0xde,0xde,0xe16,0xe13,0xe13,0xe13,0xe10,0xe16,0xe13,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1,0xe25,0xe25,0xe25,0xe25, +0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe22,0xe22,0xe22,0xe22, +0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe28,0xe28,0xe28,0xe28, +0xe28,0xe28,0xe7,0x13f2,0xe7,0xe7,0xe7,0xe7,0xe7,0x13f2,0xe7,0xe7,0xe7f,0xe7f,0xe7f,0xe7f, +0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe40,0xe34,0xe34,0xe34, +0xea,0xe34,0xe34,0xea,0xea,0xea,0xea,0xea,0xe34,0xe34,0xe34,0xe34,0xe40,0xe40,0xe40,0xe40, +0xea,0xe40,0xe40,0xe40,0xea,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40, +0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0x1914,0x1914,0xea,0xea, +0xe31,0xe31,0xe31,0xea,0xea,0xea,0xea,0xe37,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a, +0x1911,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe43,0xe43, +0xe3a,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0x1167,0x1167,0xed,0xed,0xed,0xed,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe52,0xe52,0xe52, +0xe4f,0xe4f,0xe52,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xed,0xed, +0xed,0xed,0xed,0xed,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0x1164,0xed, +0xed,0xed,0xe49,0xe49,0xe58,0xe58,0xe58,0xe58,0xf0,0xf0,0xf0,0xf0,0xe58,0xe58,0xe58,0xe58, +0xe58,0xe58,0xe58,0xe58,0xe55,0xe58,0xe58,0xe58,0xe58,0xe58,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, +0xf0,0xf0,0xf0,0xf0,0x1506,0x150c,0x1509,0x1854,0x17af,0x1878,0x1878,0x1878,0x1878,0x1878,0x191a,0x1917, +0x191d,0x1917,0x191d,0x19dd,0x1a79,0x1a79,0x1a79,0x1b3c,0x1b3c,0x1b36,0x1b33,0x1b36,0x1b33,0x1b36,0x1b33,0x1b36, +0x1b33,0x1b39,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3, +0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3, +0xf3,0xf3,0xf3,0xf3,0xe7c,0xe7c,0xe7c,0xe79,0xe79,0xe70,0xe70,0xe79,0xe76,0xe76,0xe76,0xe76, +0x1a7c,0xf6,0xf6,0xf6,0x12cc,0x12cc,0x12cc,0x12cf,0x12cf,0x12cf,0x12d2,0x12d2,0x12d5,0x12d2,0x14a,0x14a, +0x14a,0x14a,0x14a,0x14a,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0x13fe,0x13fe,0xf9,0xf9,0xf9,0xf9, +0xf9,0xf9,0xf9,0xe82,0x133b,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9, +0xf9,0xf9,0xf9,0x1338,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45, +0xc45,0xc45,0xc45,0xc45,0xeaf,0xea0,0xe9a,0xeac,0xea9,0xea3,0xea3,0xeb2,0xe9d,0xea6,0xfc,0xfc, +0xfc,0xfc,0xfc,0xfc,0xf36,0xf36,0xf21,0xf36,0xf39,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c, +0x1b3f,0x102,0x102,0x102,0xf30,0xf30,0xf30,0xf30,0xf30,0xf30,0xf30,0xf30,0xf30,0xf30,0xf42,0xf42, +0xf2a,0xf2d,0xf42,0xf42,0xf27,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf24, +0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a, +0xf2a,0x1b42,0x1b42,0x102,0x1b4b,0x1b45,0x19e3,0x19e0,0x19e3,0x19e3,0x19e3,0x1a82,0x1a7f,0x1a82,0x1a7f,0x105, +0x105,0x105,0x105,0x105,0x1b4b,0x1b45,0x105,0x1b45,0x105,0x1b45,0x1b4b,0x1b45,0x1b4b,0x1b45,0x105,0x105, +0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105, +0x105,0x105,0x1b48,0x1b48,0x1b48,0x1a82,0x1a7f,0x1515,0x1407,0x1407,0x133e,0x103e,0x103e,0x103e,0x103e,0x103e, +0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51, +0xf51,0xf51,0xf51,0xf51,0xf4e,0xf4e,0xf54,0xf54,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108, 0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d, -0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0x1512, -0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512,0x1512, -0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63, -0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63, -0xfab,0xfc3,0xfba,0xfc0,0xfc0,0xfc3,0xfc3,0xfba,0xfba,0xfc0,0xfc0,0xfc0,0xfc0,0xfc0,0xfc3,0xfc3, -0xfc3,0xfab,0xfab,0xfab,0xfab,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3, -0xfc3,0xfc3,0xfab,0xfba,0xfbd,0xfab,0xfab,0xfc0,0xfc0,0xfc0,0xfc0,0xfc0,0xfc0,0xfae,0xfc3,0xfc0, -0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0x1131,0x1131,0x112e,0x112b,0xfb4,0xfb4, -0xfdb,0xfdb,0xfdb,0xfdb,0x1296,0x1296,0x1275,0x1275,0x1275,0x1272,0x1272,0x1272,0x1272,0x1275,0x1398,0x1275, -0x1275,0x1275,0x1272,0x1275,0x1296,0x1272,0x1272,0x1272,0x1275,0x1275,0x1272,0x1272,0x1275,0x1272,0x1272,0x1275, -0xff6,0xff6,0xff6,0xff6,0xff6,0xff3,0xff3,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0x14eb,0x14eb,0x14eb, -0x1107,0xff3,0xff3,0xff3,0xff3,0x12a2,0x127e,0x127e,0x127e,0x127e,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb, -0x1017,0x1017,0x1014,0x100e,0x1014,0x100e,0x1014,0x100e,0x1014,0x100e,0x100b,0x100b,0x100b,0x100b,0x1020,0x101d, -0x100b,0x1164,0x13ec,0x13ef,0x13ef,0x13ec,0x13ec,0x13ec,0x13ec,0x13ec,0x13f2,0x13f2,0x1506,0x14fa,0x14fa,0x14f7, -0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x1032,0x102f,0x102f,0x103e,0x1035,0x133b,0x1338,0x16ec, -0x133b,0x1338,0x13fb,0x13f8,0x1509,0x1509,0x150f,0x1509,0x150f,0x1509,0x150f,0x1509,0x150f,0x1509,0x150f,0x1509, -0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035, -0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x1035, -0x1038,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x103e,0x1035,0x103e,0x1035,0x103e,0x103e,0x1035, -0x1041,0x1041,0x1047,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d, -0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d, -0x104d,0x1047,0x1041,0x1041,0x1041,0x1041,0x1047,0x1047,0x1041,0x1041,0x104a,0x1404,0x1401,0x1401,0x104d,0x104d, -0x1044,0x1044,0x1044,0x1044,0x1044,0x1044,0x1044,0x1044,0x1044,0x1044,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407, -0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062, -0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062, -0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b, -0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106e,0x106e,0x106e,0x1071,0x106e,0x106e,0x1074,0x1074, -0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077, -0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077, -0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1083,0x107a,0x1089,0x1086, -0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080, -0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080, -0x1341,0x133e,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x1098,0x1119, -0x108c,0x108c,0x108c,0x1092,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x108f,0x108f,0x1092,0x109e, -0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095, -0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095,0x109b,0x1095, -0x151e,0x151b,0x151e,0x151b,0x1521,0x1521,0x16f5,0x140a,0x10a7,0x10a7,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa, -0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa, -0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7, -0x10a7,0x10a7,0x10a7,0x10a7,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b3, -0x10b3,0x10b3,0x110d,0x10bc,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb, -0x10cb,0x10cb,0x10cb,0x10cb,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b9,0x10b9, -0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9, -0x10b9,0x10b9,0x10b9,0x10b9,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da, -0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da,0x10da, -0x10da,0x10da,0x10da,0x10da,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec, -0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec,0x10ec, -0x10ec,0x10ec,0x10ec,0x10ec,0x10f5,0x10f5,0x10f5,0x10f5,0x110a,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5, -0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5, -0x10f5,0x10f5,0x10f5,0x10f5,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8, +0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,0xf57,0xf57,0xf57,0xf57,0x1170,0x1170,0x10b,0x10b,0x10b,0xf5a, +0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b, +0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x1701,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e, +0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e, +0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0xf66,0xf66,0xf66,0x1521, +0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x111,0xf63,0xf63,0xf63,0xf63, +0x151e,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0xf69,0xf69,0xf69,0xf69, +0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0x192c,0x192c, +0x192c,0x192c,0x192c,0x192c,0x192c,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x1065,0x1065,0x1065,0x1065, +0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1053,0x1053,0x1053,0x1053,0x1053,0x1053,0x1053,0x1053, +0x1062,0x1062,0x1059,0x1056,0x117,0x117,0x117,0x1068,0x1068,0x105c,0x105c,0x105c,0x105f,0x105f,0x105f,0x105f, +0x105f,0x105f,0x105f,0x105f,0x105f,0x105f,0x117,0x117,0x117,0x1065,0x1065,0x1065,0x106b,0x106b,0x106b,0x106b, +0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x1080,0x1080,0x1080,0x1080, +0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1083,0x1083,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a, +0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x10aa,0x10aa,0x10aa,0x10aa, +0x10a4,0x17b5,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x10b0,0x10b0,0x10a7,0x10a7,0x10a7,0x10a7, +0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x10ce,0x10ce,0x10ce,0x10ce, +0x10ce,0x10ce,0x10ce,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c8,0x10cb, +0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x10c5,0x10e0,0x10e0,0x10e0,0x10e0, +0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10da,0x10da,0x10d1,0x10d1,0x10da, +0x10da,0x10d1,0x10d1,0x123,0x123,0x123,0x123,0x123,0x123,0x123,0x123,0x123,0x10dd,0x10dd,0x10dd,0x10d1, +0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10d1,0x10da,0x123,0x123,0x10d7,0x10d7,0x10d7,0x10d7, +0x10d7,0x10d7,0x10d7,0x10d7,0x10d7,0x10d7,0x123,0x123,0x10d4,0x10e3,0x10e3,0x10e3,0x152d,0x126,0x126,0x126, +0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126, +0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x10e9,0x10e9,0x10e9,0x10e9, +0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9, +0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10ec,0x129,0x129,0x10ef,0x10ef,0x10ef,0x10ef, +0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef, +0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x12c,0x12c,0x12c,0x10f2,0x10f2,0x10f2,0x10f2, +0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x12f,0x12f,0x12f, +0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x10f8,0x10f8,0x10f8,0x10f8, 0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8, -0x10f8,0x10f8,0x10f8,0x10f8,0x1104,0x1104,0x1104,0x1104,0x129c,0x129c,0x129c,0x129c,0x129c,0x129c,0x129c,0x129c, -0x1494,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6, -0x18c6,0x18c6,0x18c6,0x18c6,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179, -0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1179,0x1170,0x1170,0x1173,0x1173,0x1179,0x1170, -0x1170,0x1170,0x1170,0x1170,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f, -0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f, -0x117f,0x117f,0x117f,0x117f,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a, -0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a,0x119a, -0x119a,0x119a,0x119a,0x119a,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6, -0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6, -0x11a6,0x11a6,0x11a3,0x11a9,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5, -0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5, -0x11b5,0x11b5,0x11b5,0x11b5,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb, -0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x12e7,0x11c1,0x12ea,0x11c1,0x11c1,0x11c1,0x11c1,0x11be,0x11be,0x11be,0x11c1, -0x16f8,0x16fb,0x1923,0x1920,0x11c4,0x11c4,0x11c4,0x11d3,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9, -0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9, -0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11c7,0x11d3,0x11d3,0x11c4,0x11c4,0x11c4,0x11c4,0x11d3,0x11d3, -0x11c4,0x11c4,0x11d3,0x11d3,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5, -0x11e5,0x11e5,0x11e5,0x11e5,0x11e8,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11df,0x11df,0x11df,0x11e5,0x11e2, -0x1527,0x152a,0x152d,0x152d,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7, -0x11f7,0x11f7,0x11f7,0x11f7,0x11eb,0x11f7,0x11eb,0x11eb,0x11eb,0x1200,0x1200,0x11eb,0x11eb,0x1200,0x11f7,0x1200, -0x1200,0x11f7,0x11eb,0x11ee,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7, -0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7, -0x11f7,0x11f7,0x11f7,0x11f7,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212, -0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212, -0x1212,0x1212,0x1212,0x1212,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a, -0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a, -0x122a,0x1227,0x1227,0x1227,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242, -0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242, -0x1242,0x1242,0x1242,0x1242,0x1248,0x1248,0x1254,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257, -0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x125a,0x1257, -0x125a,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x125a, -0x1257,0x1257,0x1257,0x1257,0x1254,0x1254,0x1254,0x1248,0x1248,0x1248,0x1248,0x1254,0x1254,0x124e,0x124b,0x1251, -0x1251,0x1260,0x125d,0x125d,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, -0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, -0x1263,0x1263,0x1263,0x1263,0x1269,0x1269,0x1269,0x1266,0x1266,0x1266,0x1263,0x1263,0x1263,0x1263,0x1266,0x1263, -0x1263,0x1263,0x1269,0x1266,0x1269,0x1266,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, -0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, -0x1263,0x1269,0x1266,0x1266,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, -0x1263,0x1263,0x1263,0x1bcf,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f8,0x19f2,0x1be1,0x1be1,0x1be1, -0x1be4,0x1bde,0x1be4,0x1bde,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1287, -0x1287,0x1287,0x126c,0x1929,0x138f,0x1290,0x138f,0x138f,0x138f,0x138f,0x138f,0x138f,0x138f,0x138f,0x138f,0x138f, -0x138f,0x1290,0x138f,0x1290,0x1275,0x1275,0x131d,0x1272,0x131d,0x131d,0x131d,0x131d,0x1272,0x1272,0x1296,0x1272, -0x1272,0x1272,0x1272,0x1272,0x1272,0x1275,0x1296,0x1296,0x1275,0x1296,0x1272,0x1275,0x1275,0x1278,0x1296,0x1272, -0x1272,0x1296,0x1275,0x1275,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1281,0x1281, -0x1281,0x1281,0x13a4,0x1386,0x128a,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x1827, -0x1827,0x1827,0x1827,0x1827,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1530, -0x1530,0x1a76,0x1a76,0x1a76,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284, -0x1284,0x1284,0x1284,0x1284,0x138f,0x138f,0x1290,0x138f,0x138f,0x138f,0x1290,0x138f,0x138f,0x138f,0x128a,0x128a, -0x128a,0x128a,0x128a,0x1389,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x128d,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x128d,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x140d,0x140d, -0x19d4,0x1a76,0x1a76,0x1a76,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x128d,0x138c,0x128d, -0x128d,0x138c,0x138c,0x128d,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1, -0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1, -0x12b1,0x12b1,0x12b1,0x12b1,0x133b,0x1338,0x133b,0x1338,0x133b,0x1338,0x133b,0x1338,0x133b,0x1338,0x13fb,0x150f, -0x150f,0x150f,0x17a3,0x1917,0x150f,0x150f,0x16ef,0x16ef,0x16ef,0x16e9,0x16ef,0x16e9,0x191a,0x1917,0x19d1,0x19ce, -0x19d1,0x19ce,0x19d1,0x19ce,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f, -0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f, -0x135f,0x135f,0x135f,0x135f,0x1374,0x1365,0x1374,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377, -0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377, -0x1377,0x1377,0x1377,0x1377,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x137d,0x137d,0x137d,0x137d, -0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d, -0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x1383,0x1383,0x1383,0x1383, -0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383, -0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x13ad,0x13aa,0x18cc,0x18cc, -0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc, -0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b3,0x13b3,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b3,0x13b6,0x13b6,0x13b6,0x13b3,0x13b6,0x13b3,0x13b6, -0x13b3,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b9,0x13b6,0x13b6,0x13b6,0x13b6,0x13b3,0x13b6,0x13b3,0x13b3,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b3,0x13b3,0x13b3,0x13b3, -0x13b3,0x13b3,0x13b3,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3, -0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x1539,0x1539,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153c,0x153c,0x153c,0x153c,0x153c,0x1779,0x1779,0x1779,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x153c,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x153c,0x1779,0x1779, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b9,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1539,0x1539,0x153c,0x153c, -0x13b6,0x13b6,0x13b9,0x13b9,0x13b9,0x16a4,0x13b6,0x13b9,0x13b6,0x13b6,0x13b9,0x153f,0x153f,0x153c,0x153c,0x1779, -0x1779,0x1779,0x1779,0x1779,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x153c,0x153c,0x153c,0x16a4,0x153c,0x153c,0x153c,0x1779,0x1779,0x1779,0x177c,0x177c,0x177c,0x177c,0x177c, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x153c, -0x13b6,0x153c,0x13b9,0x13b9,0x13b6,0x13b6,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9, -0x13b9,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9, -0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b6,0x13b6,0x13b6,0x13b9,0x13b6,0x13b6,0x13b6,0x13b6,0x13b9,0x13b9,0x13b9, -0x13b6,0x13b9,0x13b9,0x13b9,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b9,0x13b6,0x13b9,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x16a4,0x13b6,0x13b6,0x13b6,0x13b6,0x153c,0x153c,0x1779, -0x1410,0x1410,0x1410,0x1410,0x1539,0x1539,0x1539,0x1539,0x1539,0x1539,0x153c,0x1779,0x1779,0x1779,0x1779,0x16fe, -0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153f,0x153f,0x153c,0x153c, -0x153c,0x153c,0x1830,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1539,0x1539,0x1539,0x1539,0x1539,0x1539,0x153c,0x13b6, -0x13b6,0x13b6,0x13b6,0x13b6,0x149a,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc, -0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x149a,0x13bc,0x13bc,0x13bc,0x149a,0x13bc,0x149a,0x13bc,0x149a,0x13bc,0x149a, -0x13bc,0x13bc,0x13bc,0x149a,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x149a,0x149a,0x13bc,0x13bc,0x13bc,0x13bc, -0x149a,0x13bc,0x149a,0x149a,0x13bc,0x13bc,0x13bc,0x13bc,0x149a,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc, -0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x16aa,0x16aa,0x177f,0x177f,0x13bf,0x13bf,0x13bf,0x13bc,0x13bc,0x13bc,0x13bf, -0x13bf,0x13bf,0x13bf,0x13bf,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629, -0x1629,0x1629,0x1629,0x1629,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2, -0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2, -0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c5,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2, -0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c5,0x13c5,0x13c5,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2, -0x13c2,0x13c2,0x13c2,0x13c2,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8, -0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8, -0x13c8,0x13c8,0x13c8,0x13c8,0x17ac,0x17ac,0x17a9,0x1701,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1413,0x1413, -0x1413,0x1413,0x1413,0x1413,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416, -0x1416,0x1416,0x1416,0x1545,0x1419,0x1548,0x1419,0x1419,0x1419,0x1419,0x1419,0x1419,0x1419,0x1419,0x1419,0x1419, -0x1419,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1704,0x1704,0x1b4b,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2, -0x17b2,0x17b2,0x1a79,0x1a79,0x1422,0x1422,0x1422,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434, -0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434, -0x1434,0x1434,0x1434,0x1434,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f, -0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f, -0x144f,0x144f,0x144f,0x144f,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, -0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, -0x1455,0x1455,0x1455,0x19da,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458, -0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458, -0x1458,0x1458,0x1458,0x1458,0x145e,0x145e,0x146a,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470, -0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470, -0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x146a,0x146a,0x146a,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e, -0x145e,0x145e,0x145e,0x146a,0x146d,0x1470,0x1473,0x1473,0x1470,0x1476,0x1476,0x1461,0x1464,0x170a,0x170d,0x170d, -0x170d,0x154e,0x1a82,0x1a7f,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x154b,0x1713, -0x1716,0x1710,0x1719,0x1719,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491, -0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491, -0x1491,0x1491,0x1491,0x1491,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb, -0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x18ff,0x18ff, -0x18ff,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x19c8,0x14eb,0x14eb, -0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x14eb,0x1863,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff, -0x18ff,0x18ff,0x18ff,0x18ff,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1539,0x1539,0x1539,0x1539,0x1539,0x1539,0x1539,0x1539, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153f,0x153c,0x153c,0x153c,0x153c,0x16a7,0x16a7,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x182d,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x155d,0x155d,0x155d,0x155d, -0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d, -0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x156f,0x156f,0x156f,0x156f, -0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f, -0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x1575,0x1575,0x1575,0x1575, -0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575, -0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1578,0x1578,0x1578,0x1578, -0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578, -0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x15a2,0x15a2,0x15a2,0x15a2, -0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x15a2,0x159c,0x159c,0x159c,0x1590,0x1590,0x1590,0x159c,0x159c, -0x1590,0x159f,0x1593,0x1590,0x15a5,0x15a5,0x1599,0x15a5,0x15a5,0x1596,0x17b5,0x1bdb,0x15b7,0x15b7,0x15b7,0x15b7, -0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7, -0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15a8,0x15c0,0x15c0,0x15c0,0x15c0, -0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, -0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15ba,0x15c3,0x15c3,0x15c3,0x15c3,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15e1,0x15e1,0x15e1,0x15e1, -0x15e1,0x15e1,0x15e1,0x15e1,0x15d8,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1, -0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15ea,0x15ea,0x15ea,0x15ea, -0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea, -0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15f9,0x15f9,0x15f9,0x15ed, -0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15f9,0x15f9,0x15ed,0x15f9,0x15f0,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x1620,0x1620,0x1620,0x1620, -0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620, -0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x161d,0x161d,0x161d,0x1629,0x1629,0x1629,0x1629, +0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x132,0x132,0x132,0x132,0x132,0x10f5,0x10fb,0x10fb,0x10fb,0x10fb, +0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x135,0x135,0x135,0x135,0x10fe,0x10fe,0x10fe,0x10fe, +0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe, +0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x1176,0x1176,0x1176,0x1176, +0x117f,0x1176,0x1176,0x1176,0x117f,0x1176,0x1176,0x1176,0x1176,0x1173,0x13b,0x13b,0x117c,0x117c,0x117c,0x117c, +0x117c,0x117c,0x117c,0x1182,0x117c,0x1182,0x117c,0x117c,0x117c,0x1182,0x1182,0x13b,0x1185,0x1185,0x1185,0x1185, +0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185, +0x1185,0x1185,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x11a0,0x11a0,0x11a0,0x11a0, +0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0, +0x11a0,0x119d,0x1188,0x119d,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x141,0x1191,0x119a,0x1188,0x119a, +0x119a,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x1188, +0x1188,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x141,0x141,0x118b,0x1197,0x1197,0x1197,0x1197, +0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x141,0x141,0x141,0x141,0x141,0x141,0x1197,0x1197,0x1197,0x1197, +0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x141,0x141,0x141,0x141,0x141,0x141,0x1194,0x1194,0x1194,0x1194, +0x1194,0x1194,0x1194,0x11a3,0x11a6,0x11a6,0x11a6,0x11a6,0x1194,0x1194,0x141,0x141,0x156c,0x156c,0x156c,0x156c, +0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x1569,0x1a97,0x12e7,0x12c0,0x12de,0x12de, +0x12de,0x12de,0x12de,0x12de,0x12de,0x12c6,0x12c3,0x12ba,0x12ba,0x12e4,0x12ba,0x12ba,0x12ba,0x12ba,0x12c9,0x14a6, +0x14ac,0x14a9,0x14a9,0x18f3,0x16c8,0x16c8,0x1a64,0x144,0x144,0x144,0x144,0x144,0x11bb,0x11bb,0x11bb,0x11bb, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11b2,0x11b2,0x11b5,0x11be, +0x11b8,0x11b8,0x11b8,0x11be,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x12ab,0x12ab,0x12ab,0x12ab, +0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab, +0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x14d,0x14d,0x14d,0x11dc,0x11d0,0x11d0,0x11d0, +0x11d0,0x11d0,0x11d0,0x11d3,0x11e2,0x11e2,0x11d0,0x11d0,0x11d0,0x11d0,0x150,0x12db,0x11d6,0x11d6,0x11d6,0x11d6, +0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x150,0x150,0x150,0x150,0x11d0,0x11d0,0x1200,0x11f4,0x1200,0x153, +0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153, +0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x11fd,0x11fd,0x1203,0x11f7,0x11fa,0x1218,0x1218,0x1218,0x1212, +0x1212,0x1209,0x1212,0x1212,0x1209,0x1212,0x1212,0x121b,0x1215,0x120c,0x156,0x156,0x120f,0x120f,0x120f,0x120f, +0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x156,0x156,0x156,0x156,0x156,0x156,0x1221,0x1221,0x1221,0x1221, +0x1221,0x1221,0x1221,0x159,0x159,0x159,0x159,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e, +0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e, +0x121e,0x121e,0x121e,0x121e,0x159,0x159,0x159,0x159,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a, +0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x15c,0x1227, +0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1224,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239, +0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x15f,0x15f, +0x15f,0x1233,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f, +0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x162,0x162, +0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245, +0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x165,0x165,0x165,0x165,0x165, +0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b, +0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b, +0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x16b,0x1263,0x1263,0x1b4e,0x16e,0x16e,0x16e,0x16e,0x16e, +0x16e,0x16e,0x16e,0x16e,0x16e,0x1935,0x16e,0x16e,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485, +0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836, +0x1836,0x1836,0x1836,0x1836,0x1836,0x1a88,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171, +0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171, +0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a, +0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a, +0x134a,0x134a,0x134a,0x134a,0x12b4,0x13b0,0x13b0,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174, +0x174,0x174,0x174,0x174,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1, +0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x1839, +0x174,0x174,0x174,0x174,0x12ae,0x12ae,0x12ae,0x12ae,0x12ae,0x12ae,0x12ae,0x12ae,0x12ae,0x174,0x174,0x174, +0x174,0x174,0x174,0x174,0x13d4,0x13d4,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174, +0x174,0x174,0x174,0x174,0x18d8,0x18d8,0x18d8,0x18d8,0x18d8,0x18d8,0x174,0x174,0x174,0x174,0x174,0x174, +0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174, +0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x174,0x1353,0x1353,0x1353,0x1353, +0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353, +0x1353,0x1353,0x1353,0x1353,0x1353,0x134d,0x134d,0x134d,0x177,0x177,0x1350,0x177,0x1365,0x1365,0x1365,0x1365, +0x1365,0x1365,0x1356,0x135f,0x1359,0x1359,0x135f,0x135f,0x135f,0x1359,0x135f,0x1359,0x1359,0x1359,0x1362,0x1362, +0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x135c,0x135c,0x135c,0x135c,0x17d,0x1368,0x1368,0x1368, +0x1368,0x1368,0x1368,0x17d,0x17d,0x1368,0x1368,0x1368,0x1368,0x1368,0x1368,0x17d,0x17d,0x1368,0x1368,0x1368, +0x1368,0x1368,0x1368,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x1368,0x1368,0x1368,0x1368, +0x1368,0x1368,0x1368,0x17d,0x1368,0x1368,0x1368,0x1368,0x1368,0x1368,0x1368,0x17d,0x15cf,0x15cf,0x15cf,0x15cf, +0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x136b,0x136b,0x136b,0x136b, +0x136b,0x136b,0x136e,0x1383,0x1383,0x1374,0x1374,0x1374,0x1374,0x1374,0x180,0x180,0x180,0x180,0x1371,0x1371, +0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1371,0x1377,0x1377, +0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1b54,0x1b57,0x1b57,0x1b51,0x1b51,0x1b57,0x180,0x180, +0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x153c,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386, +0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386, +0x1386,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x186,0x186,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x153f,0x186,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x13b9,0x186,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f, +0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x153f,0x186,0x186, +0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x13ce,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x154b,0x154b, +0x154b,0x154b,0x154b,0x154b,0x16bf,0x154b,0x154b,0x154b,0x1791,0x1842,0x1842,0x187b,0x187b,0x1a46,0x1af1,0x1af1, +0x189,0x189,0x189,0x189,0x1c3e,0x1bc0,0x1bc0,0x1bc0,0x154b,0x154b,0x154b,0x154b,0x154b,0x154b,0x154b,0x154b, +0x154b,0x154b,0x154b,0x16bc,0x16bc,0x189,0x189,0x189,0x154b,0x154b,0x154b,0x154b,0x1842,0x1842,0x1842,0x18de, +0x18de,0x19c2,0x1a46,0x1af1,0x1af1,0x189,0x189,0x189,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1be4,0x1be4,0x1be4,0x18c, +0x18c,0x18c,0x18c,0x1be4,0x1be4,0x1be4,0x1be4,0x1be4,0x1428,0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x1428, +0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428, +0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x18f,0x1428,0x18f,0x18f,0x1428, +0x18f,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x1428,0x1428, +0x18f,0x1428,0x18f,0x1428,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x1428,0x18f,0x18f,0x18f,0x18f,0x1428, +0x18f,0x1428,0x18f,0x1428,0x18f,0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x18f,0x1428,0x18f,0x18f,0x1428, +0x18f,0x1428,0x18f,0x1428,0x18f,0x1428,0x18f,0x1428,0x18f,0x1428,0x1428,0x18f,0x1428,0x18f,0x18f,0x1428, +0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x1428,0x1428, +0x18f,0x1428,0x1428,0x1428,0x1428,0x18f,0x1428,0x18f,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428, +0x1428,0x1428,0x18f,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428, +0x1428,0x1428,0x1428,0x1428,0x18f,0x18f,0x18f,0x18f,0x18f,0x1428,0x1428,0x1428,0x18f,0x1428,0x1428,0x1428, +0x1428,0x1428,0x18f,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428,0x1428, +0x1428,0x1428,0x1428,0x1428,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f, +0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f, +0x18f,0x18f,0x18f,0x18f,0x1425,0x1425,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f, +0x18f,0x18f,0x18f,0x18f,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x142b,0x142b,0x142b,0x142b,0x142b, +0x143a,0x142b,0x142e,0x142e,0x142b,0x142b,0x142b,0x1431,0x1431,0x192,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437, +0x1437,0x1437,0x1437,0x1437,0x1434,0x1440,0x1440,0x1440,0x1941,0x193e,0x193e,0x1a8e,0x192,0x192,0x192,0x192, +0x192,0x192,0x192,0x192,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1, +0x15e1,0x15e1,0x15e1,0x15e1,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x1449, +0x1443,0x1443,0x1449,0x1449,0x1452,0x1452,0x144c,0x144f,0x144f,0x1449,0x1446,0x195,0x195,0x195,0x195,0x195, +0x195,0x195,0x195,0x195,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, +0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x198,0x198,0x198,0x198, +0x1716,0x1716,0x1455,0x1455,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716, +0x1716,0x1716,0x1716,0x1716,0x198,0x198,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716, +0x1716,0x1716,0x1716,0x1716,0x1461,0x1461,0x1461,0x1461,0x1461,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19b, +0x19b,0x19b,0x19b,0x19e9,0x1461,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e, +0x145e,0x145e,0x145e,0x145e,0x19ec,0x19ec,0x19ec,0x19ec,0x19ec,0x19ec,0x19ec,0x19ec,0x19b,0x19b,0x19b,0x19b, +0x19b,0x19b,0x19b,0x145b,0x145b,0x145b,0x145b,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464, +0x1464,0x1464,0x1464,0x1464,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x19e,0x19e,0x19e, +0x19e,0x19e,0x19e,0x19e,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x19e,0x19e, +0x19e,0x19e,0x19e,0x19e,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1488,0x1a1,0x1a1,0x1a1,0x1a1, +0x1a1,0x1a1,0x1a1,0x1a1,0x12e1,0x12de,0x12e1,0x12bd,0x12de,0x12e4,0x12e4,0x12e7,0x12e4,0x12e7,0x12ea,0x12de, +0x12e7,0x12e7,0x12de,0x12de,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x148b, +0x1494,0x148b,0x1494,0x1494,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x1497,0x148e,0x19f2,0x1b63,0x1a4,0x1a4, +0x1a4,0x1a4,0x1a4,0x1a4,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d, +0x155d,0x155d,0x1a7,0x1a7,0x155a,0x155a,0x155a,0x155a,0x155a,0x1560,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7, +0x1a7,0x1a7,0x1a7,0x1a7,0x1566,0x1566,0x1566,0x1566,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa, +0x1aa,0x1aa,0x1aa,0x1563,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x2af, +0x1bae,0x1bae,0x1bae,0x1bae,0x16cb,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2, +0x1ad,0x1ad,0x1ad,0x1ad,0x1a97,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69,0x1b69, +0x1b66,0x1b66,0x1b66,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0, +0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0, +0x1b0,0x1b0,0x1b0,0x1b0,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1b3, +0x1b3,0x1b3,0x1b3,0x1b3,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578, +0x1578,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578, +0x1578,0x1578,0x1b3,0x1b3,0x1575,0x156f,0x1572,0x157b,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e, +0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566, +0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581, +0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1944,0x1944,0x1944, +0x1944,0x1be7,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49, +0x1a49,0x1a49,0x1a49,0x1a49,0x1b9,0x1b9,0x1b9,0x1b9,0x1bc3,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9, +0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x172b,0x16ce,0x158a,0x16d4,0x1bc,0x1596,0x1596,0x1596, +0x1596,0x1596,0x1596,0x1596,0x1596,0x1bc,0x1bc,0x1596,0x1596,0x1bc,0x1bc,0x1596,0x1596,0x1596,0x1596,0x1596, +0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1bc,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596, +0x1596,0x1bc,0x1596,0x1596,0x1bc,0x1596,0x1596,0x1596,0x1596,0x1596,0x1bc,0x19ce,0x16d1,0x1593,0x1584,0x158a, +0x1584,0x158a,0x158a,0x158a,0x158a,0x1bc,0x1bc,0x158a,0x158a,0x1bc,0x1bc,0x158d,0x158d,0x1590,0x1bc,0x1bc, +0x172e,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1584,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x159c,0x1599,0x1599, +0x1596,0x1596,0x158a,0x158a,0x1bc,0x1bc,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1bc,0x1bc,0x1bc, +0x1587,0x1587,0x1587,0x1587,0x1587,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc, +0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1, +0x15b1,0x15b1,0x1bf,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1, +0x15c3,0x15c3,0x15c3,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15ba,0x15bd,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2, +0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2, +0x1731,0x1731,0x1731,0x1731,0x15cf,0x15cc,0x19f5,0x19f5,0x1a9d,0x1aa0,0x1a9a,0x1a9a,0x1c5,0x1c5,0x1c5,0x1c5, +0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, +0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, +0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1, +0x15e1,0x15e1,0x15e1,0x15d8,0x15db,0x15de,0x15e1,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb, +0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15e4,0x15e4,0x1ce,0x1ce,0x1ce,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7, +0x15ed,0x15ed,0x16d7,0x15ed,0x15ed,0x15ed,0x15ea,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce, +0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x1d1,0x1d1,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6, +0x15f3,0x15f3,0x15f3,0x15f3,0x15f3,0x15f3,0x15f3,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1, +0x15fc,0x160e,0x160e,0x1602,0x160b,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4, +0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4, +0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614, +0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1d7, +0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1611,0x1d7,0x1d7,0x1d7,0x1d7,0x1617,0x1617, +0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f, +0x1620,0x1620,0x1620,0x1620,0x1620,0x161a,0x1623,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620, +0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x1620,0x1620,0x1620,0x1620,0x1620,0x1da, 0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629, -0x1629,0x1629,0x162f,0x162f,0x162f,0x162c,0x162c,0x162c,0x1629,0x1629,0x1629,0x1629,0x163e,0x163e,0x163e,0x163e, -0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1644,0x1644,0x1638,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x163e,0x163e,0x163e,0x163e, -0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e, -0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x164a,0x164a,0x164a,0x164a, -0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a, -0x164a,0x164a,0x164a,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x164d,0x164d,0x164d,0x164d, -0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, -0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x1671,0x1671,0x1671,0x1671, -0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671, -0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x167a,0x167a,0x167a,0x167a, -0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a, -0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x1692,0x1692,0x1692,0x1692, -0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x167d,0x168c,0x168c,0x167d, -0x167d,0x167d,0x167d,0x167d,0x167d,0x168c,0x167d,0x168f,0x168f,0x167d,0x168f,0x167d,0x1692,0x1692,0x1692,0x1692, -0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692, -0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x169b,0x169b,0x169b,0x169b, -0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b, -0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x16a1,0x16a1,0x16a1,0x16a1, -0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1, -0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x18ff,0x18ff,0x18ff,0x18ff, -0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x16e6,0x16e6,0x16e6,0x16e6,0x18ff,0x18ff,0x18ff,0x18ff, -0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x19c8,0x1707,0x1707,0x1707,0x1707, -0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707, -0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x1746,0x1746,0x1746,0x1746, -0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746, -0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x174c,0x1749, -0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746, +0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1dd, +0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1635, +0x1635,0x1635,0x1635,0x1635,0x1635,0x1635,0x1632,0x1632,0x1632,0x1632,0x1632,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0, +0x164d,0x164d,0x1650,0x1650,0x1653,0x1644,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3, +0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x1e3,0x1644,0x1644,0x1644,0x1644,0x1644, +0x1644,0x1644,0x1e3,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x164d,0x164d,0x164d, +0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c, +0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6, +0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665, +0x1665,0x1665,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1662,0x1662,0x1662,0x1662,0x1e9,0x1e9,0x1e9, +0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1668, +0x167a,0x167a,0x1668,0x1668,0x1668,0x1668,0x1ef,0x1ef,0x167a,0x167a,0x167d,0x167d,0x1668,0x1668,0x167a,0x166e, +0x166b,0x1671,0x1683,0x1683,0x1674,0x1674,0x1677,0x1677,0x1677,0x1683,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, +0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x1737,0x1737,0x1737,0x1737,0x1734,0x1734,0x1ef,0x1ef, +0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef, +0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef, +0x1f2,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2, +0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1f5,0x1f5,0x1f5,0x1f5, +0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689, +0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689, +0x1689,0x1689,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689, +0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689, +0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1f5,0x1f5,0x1aa3,0x1aa3,0x1f5,0x1f5, +0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5, +0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5, +0x168c,0x169b,0x1692,0x168f,0x16a1,0x16a1,0x1695,0x16a1,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8, +0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8, +0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a4,0x16a4,0x16a4,0x16a4,0x16a4,0x16a4, +0x16a4,0x16a4,0x16a4,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16ad, 0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f, -0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f, -0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752, -0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752, -0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764, -0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764,0x1764, -0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767, -0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767, -0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a, -0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a, -0x176a,0x176a,0x176a,0x176d,0x176d,0x176d,0x176d,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a, -0x176a,0x176a,0x176a,0x176a,0x176a,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176a,0x176d,0x176d, -0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d, -0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d, -0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785, -0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785, -0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x1ae5,0x1a3a,0x1a3a,0x1a3d, -0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x178b,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x18d5, -0x1788,0x1788,0x1788,0x1788,0x1788,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836, -0x1836,0x1836,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3, -0x1788,0x19b3,0x19b3,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1ae2,0x1bb4,0x1a3d,0x1a3d,0x1a3d, -0x18d2,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d2,0x18d2, -0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1b4b,0x1b4e,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48, -0x1b48,0x1b48,0x1b48,0x192c,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af, -0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1839,0x1836,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2, -0x1839,0x18d5,0x18d5,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1836,0x17b8,0x1839,0x1839,0x1839,0x1a3a, -0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x17b8,0x1836,0x1836,0x1836,0x1836,0x1836,0x18d2,0x19b3,0x19b3,0x19b3, -0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x18d2, -0x17cd,0x17cd,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca, -0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca,0x17ca, -0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd, -0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd, -0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b, -0x181b,0x181b,0x181b,0x181b,0x181b,0x1818,0x1818,0x1818,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803, -0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b, -0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b, -0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f, -0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f, -0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842, -0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842, -0x1842,0x1842,0x1842,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee, -0x1899,0x1899,0x1899,0x1899,0x19ec,0x19ec,0x189c,0x189c,0x189c,0x189c,0x1884,0x1884,0x1884,0x1884,0x1884,0x1884, -0x1884,0x1884,0x1884,0x1884,0x1884,0x1884,0x1884,0x1896,0x1887,0x188a,0x188d,0x189f,0x189f,0x193e,0x1890,0x1890, -0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899, -0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899, -0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba, -0x18ba,0x18ba,0x18ba,0x18a5,0x18ab,0x18a8,0x18a8,0x18a8,0x18a8,0x18b7,0x18bd,0x18a8,0x18a8,0x18a8,0x18a8,0x18b4, -0x18ba,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba, -0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba,0x18ba, -0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, -0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ba5,0x1ba5,0x1ba5, -0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc, -0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc,0x18cc, -0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x19b3,0x19b3,0x19b3,0x19b3, -0x19b3,0x1a3a,0x1ae2,0x19b3,0x19b3,0x19b3,0x19b3,0x1ae5,0x1ae2,0x1bb4,0x19b3,0x1a3a,0x19b3,0x19b3,0x19b3,0x19b3, -0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3, -0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3, -0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, -0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, -0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de, -0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x18de,0x1bb7, -0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, -0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, -0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953, -0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953, -0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e, -0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e, -0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974, -0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974, -0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f, -0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f,0x198f, -0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992, +0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x1947,0x1fe,0x1fe,0x173d,0x173d,0x173d, +0x1749,0x1749,0x173d,0x173d,0x173d,0x173d,0x174c,0x173d,0x173d,0x173d,0x173d,0x1740,0x1fe,0x1fe,0x1fe,0x1fe, +0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1743,0x1743,0x1752,0x1752,0x1752,0x1743, +0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201, +0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201, +0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767, +0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x1767,0x207,0x1767,0x1767,0x207,0x207, +0x207,0x207,0x207,0x1764,0x1764,0x1764,0x1764,0x1764,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x20a, +0x176a,0x20a,0x176a,0x176a,0x176a,0x176a,0x20a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a, +0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x20a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a, +0x176a,0x176d,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, +0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776, +0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x20d,0x20d,0x20d,0x20d,0x20d, +0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773, +0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x20d,0x20d,0x20d,0x20d,0x20d, +0x20d,0x20d,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x177c,0x177c,0x177c,0x177c,0x1779,0x177c,0x177c,0x177f, +0x1782,0x177f,0x177f,0x177c,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210, +0x210,0x210,0x210,0x1779,0x1779,0x1779,0x1779,0x1779,0x17d9,0x17d9,0x17d9,0x17d9,0x17d0,0x17d0,0x17d0,0x17ca, +0x17cd,0x17cd,0x17cd,0x19f8,0x213,0x213,0x213,0x213,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6, +0x17d6,0x17d6,0x213,0x213,0x213,0x213,0x17d3,0x17d3,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4, +0x17f4,0x216,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4, +0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f1,0x17df,0x17df,0x17df,0x17df, +0x17df,0x17df,0x17df,0x216,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17f1,0x17e2,0x17f4,0x17f7,0x17f7,0x17eb, +0x17e8,0x17e8,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x17ee,0x17ee,0x17ee,0x17ee, +0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5, +0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x216,0x216,0x216,0x1803,0x1806,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c, +0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa, +0x17fa,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965, +0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x21c, +0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd, +0x17fd,0x21c,0x21c,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x21c,0x17fd,0x17fd,0x21c,0x17fd,0x17fd, +0x17fd,0x17fd,0x17fd,0x21c,0x21c,0x21c,0x21c,0x21c,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e, +0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x184b,0x18e7,0x1a52,0x1a55,0x1afd,0x21f,0x21f,0x21f, +0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x1afa,0x1afa,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f, +0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c, +0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x222,0x222,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800, +0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800,0x222,0x1809,0x1800,0x1800,0x1800,0x1800,0x1800,0x1800, +0x1800,0x1809,0x1800,0x1800,0x1809,0x1800,0x1800,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222, +0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x225,0x225,0x225, +0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225, +0x1827,0x1827,0x1818,0x1812,0x1812,0x1827,0x1815,0x182a,0x182a,0x182a,0x182a,0x182d,0x182d,0x1821,0x181e,0x181b, +0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1aa6,0x1821,0x228,0x181b,0x194a,0x19fb, +0x1aa9,0x1aa9,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228, +0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228, +0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833,0x1833, +0x1833,0x1833,0x1833,0x1833,0x22b,0x22b,0x22b,0x22b,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830, +0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830, +0x1830,0x1830,0x1830,0x1830,0x22b,0x22b,0x22b,0x22b,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e, +0x184e,0x184e,0x184e,0x184e,0x184e,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x1a58,0x1a58,0x1a58,0x1a58,0x1a58,0x1a58, +0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x1bcc,0x1bcc,0x1bcc,0x231,0x231,0x231,0x231,0x231, +0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x273,0x273,0x1c41,0x273,0x273,0x273,0x273,0x273, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x234, +0x188d,0x188d,0x234,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d, +0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x234, +0x234,0x234,0x1881,0x234,0x1881,0x1881,0x234,0x1881,0x1881,0x1881,0x1884,0x1881,0x1887,0x1887,0x1890,0x1881, +0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x188a,0x188a,0x188a,0x188a,0x188a,0x188a,0x188a,0x188a, +0x188a,0x188a,0x234,0x234,0x234,0x234,0x234,0x234,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x237,0x237,0x237,0x237,0x189f,0x18a2,0x18a2,0x23a,0x23a,0x23a,0x23a,0x23a, +0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8, +0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x1ba8,0x18b1,0x18b1,0x18b1,0x18b1,0x18b1,0x18b1,0x18b1,0x18b1, +0x18b1,0x18b1,0x18b1,0x23d,0x23d,0x23d,0x23d,0x23d,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75, +0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x18bd,0x18c0,0x18cf,0x18cf,0x18c0,0x18c3,0x18bd,0x18ba, +0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x18a8,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x18a5, +0x18a5,0x1893,0x1893,0x1893,0x18a8,0x18a8,0x18a8,0x18a8,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01, +0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x243,0x243,0x243,0x243, +0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x1950,0x1950,0x1950,0x1950,0x1950,0x1950,0x1950,0x1950, +0x1950,0x1950,0x1950,0x1950,0x1950,0x1950,0x243,0x243,0x1a61,0x1a61,0x1a61,0x1a61,0x1b03,0x1c47,0x1c47,0x1c47, +0x1a61,0x1a61,0x1a61,0x1bcf,0x1bcf,0x276,0x276,0x276,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962, +0x1962,0x1962,0x1962,0x1962,0x195f,0x195f,0x195f,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953,0x1953, +0x195f,0x1959,0x1956,0x195c,0x246,0x246,0x246,0x246,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965, +0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965,0x1965, +0x1965,0x1965,0x1965,0x249,0x249,0x1965,0x1965,0x1965,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x24c,0x1974, +0x1974,0x24c,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974, +0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,0x1971,0x1971,0x1971,0x1971,0x1971,0x24c, +0x1968,0x1968,0x24c,0x1971,0x1971,0x1968,0x1971,0x196b,0x1974,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c, +0x197d,0x197d,0x1980,0x1980,0x1977,0x1977,0x1977,0x1977,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f, +0x197a,0x197a,0x197a,0x197a,0x197a,0x197a,0x197a,0x197a,0x197a,0x197a,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f, +0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1986,0x1983,0x1983,0x1983, +0x1986,0x1983,0x1983,0x1983,0x1983,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252, 0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992, +0x1992,0x1992,0x198f,0x1989,0x1989,0x198c,0x198c,0x1995,0x1995,0x255,0x255,0x255,0x255,0x255,0x255,0x255, +0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998, +0x1998,0x1998,0x1998,0x1998,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258, 0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b, -0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x1998,0x1998,0x1998, -0x19b3,0x19b3,0x19b3,0x1ae2,0x1ae2,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1a3a,0x1ae2,0x1ae2,0x1ae2,0x1a3a,0x1a3a, -0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b6,0x19b6,0x19b3,0x19b6,0x19b6,0x1a3a,0x1a3d,0x1a3a,0x1a3a,0x1a3a,0x1a3a, -0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef, -0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef, -0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16, -0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16,0x1a16, -0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f, -0x1a55,0x1a55,0x1a1f,0x1a55,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a25,0x1a25,0x1a25, -0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31, -0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31, -0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4, -0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4,0x1ac4, -0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0, -0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0,0x1ad0, -0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4, -0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4, -0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7, -0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7, -0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c, -0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c,0x1b6c, -0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d, +0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199e,0x19a7,0x199b,0x199b,0x25b,0x25b,0x25b,0x25b,0x25b, +0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19ad,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e, +0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6, +0x19b6,0x19b6,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b0,0x19b3,0x19b3,0x19b3, +0x19b3,0x19b9,0x19b9,0x19b9,0x19b9,0x19b9,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261, +0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96, +0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1b96,0x1bf9,0x1bff,0x1bff,0x1bff,0x1bff,0x1bff,0x1bff,0x1bfc, +0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x264,0x264, +0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d, +0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x267, +0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c, +0x26a,0x26a,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c, +0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a1c,0x1a19,0x1a19,0x1a19,0x1a10,0x1a10,0x1a10,0x1a10, +0x26a,0x26a,0x1a10,0x1a10,0x1a19,0x1a19,0x1a19,0x1a19,0x1a13,0x1a1c,0x1a16,0x1a1c,0x1a19,0x26a,0x26a,0x26a, +0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a, +0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28, +0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x26d,0x26d,0x26d,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a28, +0x1a28,0x1a28,0x1a28,0x1a28,0x1a2b,0x1a2b,0x26d,0x26d,0x270,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e, +0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e, +0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x1a2e,0x270,0x270,0x273,0x273,0x273,0x273, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x1a5b,0x1a5b,0x1a5b,0x273, +0x273,0x1c44,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x1a5e,0x1a5e,0x1a5e,0x1a5e, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x1a61,0x1a61,0x1a61,0x1b03,0x1b03,0x1b03,0x1b03,0x1c47, +0x1c47,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x1a61,0x1a61,0x1a61,0x1a61,0x1a61,0x1a61,0x1b03,0x1b03, +0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1c47,0x1c47,0x1c47, +0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1c47,0x1c47,0x1c47,0x276,0x1c47, +0x1b03,0x1b03,0x1b03,0x1bd2,0x1bd2,0x1bd2,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x1c47,0x1c47, +0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1b03,0x1bcf,0x1bcf,0x1bcf,0x1c47,0x1c47,0x276,0x276,0x276,0x276, +0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1bcf,0x1c47,0x276,0x276,0x276,0x276,0x276,0x276,0x276, +0x1bd2,0x1bd2,0x1bd2,0x1bd2,0x1bd2,0x1bd2,0x1bd2,0x1c4a,0x1c4a,0x276,0x276,0x276,0x276,0x276,0x276,0x276, +0x1a37,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31, +0x1a31,0x1a31,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x1a34, +0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a3a,0x1a3a,0x1a3a,0x1a3a, +0x1a40,0x1a40,0x1a40,0x1a40,0x1a40,0x1a40,0x1a40,0x1a40,0x1a40,0x1a40,0x27c,0x27c,0x27c,0x27c,0x27c,0x1a3d, +0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x1aac,0x27f,0x27f,0x27f,0x27f, +0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f, +0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x282,0x282,0x1aca,0x282,0x282,0x1aca,0x1aca,0x1aca,0x1aca, +0x1aca,0x1aca,0x1aca,0x1aca,0x282,0x1aca,0x1aca,0x282,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca, +0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1aca,0x1ab2,0x1ac1,0x1ac1,0x1ac1,0x1ac1,0x1ac1,0x282,0x1ac1, +0x1ac4,0x282,0x282,0x1ab2,0x1ab2,0x1ac7,0x1ab8,0x1acd,0x1ac1,0x1acd,0x1ac1,0x1ab5,0x1ad0,0x1abb,0x1ad0,0x282, +0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x1abe,0x1abe,0x1abe,0x1abe,0x1abe,0x1abe,0x1abe,0x1abe, +0x1abe,0x1abe,0x282,0x282,0x282,0x282,0x282,0x282,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09, +0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x285,0x285, +0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285, +0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285, +0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6, +0x1ad6,0x1ad6,0x1ad6,0x28b,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6, +0x1ad6,0x1ad6,0x1ad6,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, +0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9,0x1ad9, +0x1ad9,0x1ad9,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1b0c,0x1b0c,0x1b0c,0x1b0c,0x1b0c,0x1b0c,0x1b0c,0x1b0c, +0x1b0c,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e, +0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e, +0x28e,0x28e,0x28e,0x28e,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x291,0x1adc, +0x1adc,0x1adf,0x291,0x291,0x1ae2,0x1ae2,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291, +0x291,0x291,0x291,0x291,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b75,0x1b72,0x1b75,0x1b75,0x1b75, +0x1b75,0x1b75,0x1b75,0x294,0x1b78,0x1b78,0x294,0x294,0x294,0x294,0x294,0x294,0x1b6f,0x1b6f,0x1b6f,0x1b6f, +0x1b6f,0x1b6f,0x1b6f,0x1b6f,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e, +0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7b,0x1b7b,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297, +0x297,0x297,0x297,0x297,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x29a,0x1b81,0x1b81,0x1b81,0x1b81, +0x29a,0x1b81,0x1b81,0x29a,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81,0x1b81, +0x1b81,0x1b81,0x1b81,0x29a,0x1b84,0x1b8a,0x1b8a,0x1b87,0x1b87,0x1b87,0x2a0,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87, +0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87, +0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x2a0,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x1b87,0x2a0, +0x2a0,0x2a0,0x2a0,0x2a0,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b90,0x1b8d, 0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d,0x1b8d, +0x1b8d,0x1b8d,0x1b8d,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x1c02,0x2a3, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x1b96,0x1b96,0x1b93,0x1b93,0x1b93,0x1b93,0x1b99,0x1b99,0x1b99,0x1b99,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, +0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, +0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf,0x1aaf, 0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f, +0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x2a9, +0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x1b9c,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d,0x155d, +0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba5,0x1ba2,0x2ac, +0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac, +0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x1bae,0x2af,0x1bae,0x1bae,0x1bae,0x1bae, +0x1bae,0x1bae,0x1bae,0x2af,0x1bae,0x1bae,0x2af,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab, +0x1bab,0x1bab,0x2af,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab, +0x1bab,0x1bab,0x2af,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x1bab,0x2af,0x1bab,0x1bab,0x2af,0x2af,0x2af, +0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x2b2,0x2b2, +0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4, +0x1bb4,0x1bb4,0x1bb4,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x1bb1,0x1bb1,0x1bb1,0x1bb1, +0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x2b5,0x2b5,0x2b5,0x2b5, +0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5, +0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x1c05,0x1c05,0x1c05, +0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0e,0x1c0e,0x1c0e,0x2b8,0x2b8, +0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8, +0x1c11,0x1c11,0x1c11,0x1c11,0x1c11,0x1c11,0x1c11,0x1c11,0x1c11,0x1c11,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, +0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, +0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x1c14,0x1c14,0x1c14,0x1c14, +0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14,0x1c14, +0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x1c17,0x1c17,0x1c2c,0x1c23, +0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x2c1,0x1c29,0x1c29, +0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29,0x1c29, +0x1c29,0x1c29,0x1c29,0x1c29,0x1c23,0x1c23,0x1c17,0x1c17,0x1c17,0x1c17,0x1c17,0x2c1,0x2c1,0x2c1,0x1c23,0x1c23, +0x1c17,0x1c26,0x1c1a,0x1c2f,0x1c2f,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d,0x1c1d, +0x1c20,0x1c20,0x1c20,0x1c20,0x1c20,0x1c20,0x1c20,0x1c20,0x1c20,0x1c20,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1, +0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c38,0x1c32,0x1c32,0x1c32,0x1c32, +0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x1c35,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4, +0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7, +0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7, +0x2c7,0x2c7,0x95d,0x95d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d, +0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7, +0x2c7,0x2c7,0x2c7,0x2c7,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0x12a5, +0x12a5,0x12a5,0x2ca,0x2ca,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97, +0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0x2ca,0x2ca, +0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca, +0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca,0x2ca, +0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b, +0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0x2cd,0x2cd, +0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7, +0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x1bbd,0x1bbd,0x1bbd,0x1bbd,0x1c3b,0x2d0,0x2d0,0x2d0,0x2d0,0x2d0,0x2d0, +0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1, +0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x2d3,0x2d3, +0x1794,0x1794,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6, +0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea, +0x18ea,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9,0x2d9, +0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59, +0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x2dc,0x2dc,0x2dc,0x2dc,0x2dc, +0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d, +0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59, +0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x2df,0x2df, +0x3e7,0x3db,0x3db,0x3db,0x3db,0x3db,0x3db,0x3db,0x3db,0x3e7,0x3e7,0x3e7,0x3e7,0x3e1,0x1125,0x12fc, +0x3ea,0x927,0x92a,0x3d8,0x3d8,0x1122,0x12f9,0x12f9,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed,0x3ed, +0x1122,0x3db,0x3db,0x3e7,0xcb1,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, +0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, +0x3ea,0x3ea,0x3db,0x3db,0x8af,0x8b2,0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x945, +0x3e4,0xf81,0xf7e,0x12ff,0x12ff,0x12ff,0x12ff,0x12ff,0x14c1,0x1128,0x1128,0xed3,0xed3,0xda4,0xed3,0xed3, +0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ed,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, +0x3ea,0x3ed,0x3ea,0x3ea,0x3ed,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x12f9,0x12fc,0x3de,0x3ea,0x3e7,0x3e7, +0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x1305,0x489,0x489,0x489,0x489,0x489,0x489, +0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x1305,0x1866,0x1866,0xf9f,0x47a,0x483, +0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5, +0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0x4c5,0xba6,0xba6,0xdb3,0xdb3,0x8b5,0xdb0,0x13e3,0x13e3,0x13e3, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8, +0x4ce,0x4ce,0x4ce,0x113d,0x113d,0x113d,0x113d,0x113d,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x113a,0x113a,0x113a,0x113a,0x113a,0x113a, +0x4d1,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce, +0x4ce,0x4ce,0x4ce,0x4ce,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4, +0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4, +0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4d4,0x4d4,0x4d4,0x4d4,0x4d7,0x9a2,0xfcc,0xfcc,0xfcf,0xfcc, +0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4, +0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0x4da,0x4d4,0xfcf,0xfcc,0xfcf,0xfcc,0xfcf,0xfcc, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9, +0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e6,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9,0x4e9, +0x6a5,0x6a5,0x6a8,0x504,0x6b4,0x6b1,0x6b1,0x6ae,0x52e,0x52e,0x4ec,0x4ec,0x4ec,0x4ec,0x4ec,0xb37, +0x6b7,0x510,0x6cf,0x6d2,0x525,0x6b7,0x513,0x513,0x504,0x51f,0x51f,0x6a5,0x52b,0x528,0x6ab,0x4fe, +0x4f5,0x4f5,0x4f8,0x4f8,0x4f8,0x4f8,0x4f8,0x4fb,0x4f8,0x4f8,0x4f8,0x4ef,0x537,0x534,0x531,0x531, +0x6c3,0x519,0x516,0x6c0,0x6bd,0x6ba,0x6cc,0x507,0x6c9,0x6c9,0x51c,0x51f,0x6c6,0x6c6,0x51c,0x51f, +0x501,0x504,0x504,0x504,0x522,0x50d,0x50a,0xbbb,0xada,0xada,0xad7,0xad7,0xad7,0xad7,0xbb2,0xbb2, +0xbb2,0xbb2,0xbb8,0xcde,0xcdb,0xdbf,0xdc2,0xbb5,0xdc2,0xdc2,0xdc2,0xdc2,0xdbf,0xdc2,0xdc2,0xbaf, +0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x558,0x55e,0x73e,0x55b,0x9a5,0x9c6,0xadd,0xadd,0xadd, +0xbc1,0xbc1,0xdc8,0xdc8,0xdc8,0xdc8,0x1146,0x1149,0x1149,0x131a,0x14af,0x14d9,0x14dc,0x14dc,0x16e9,0x1869, +0x56a,0x56a,0x582,0x6e4,0x567,0x6de,0x56a,0x57f,0x567,0x6e4,0x579,0x582,0x582,0x582,0x579,0x579, +0x582,0x582,0x582,0x6ea,0x567,0x582,0x6e7,0x567,0x576,0x582,0x582,0x582,0x582,0x582,0x567,0x567, +0x56d,0x6de,0x6e1,0x567,0x582,0x567,0x6ed,0x567,0x582,0x570,0x588,0x6f0,0x582,0x582,0x573,0x579, +0x582,0x582,0x585,0x582,0x579,0x57c,0x57c,0x57c,0x57c,0xae6,0xae3,0xce1,0xdd1,0xbd6,0xbd9,0xbd9, +0xbd3,0xbd0,0xbd0,0xbd0,0xbd0,0xbd9,0xbd6,0xbd6,0xbd6,0xbd6,0xbcd,0xbd0,0xdce,0xedf,0xee2,0xfd5, +0x114c,0x114c,0x114c,0x6f6,0x6f3,0x58b,0x58e,0x58e,0x58e,0x58e,0x58e,0x6f3,0x6f6,0x6f6,0x6f3,0x58e, +0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x597,0x597,0x597,0x597, +0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x591,0x591,0x591,0x591,0x591,0x591, +0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59a,0x59d,0x59d,0x59d,0x59d,0x59d,0x5a0,0x59a, +0x59d,0x59d,0x59a,0x59a,0x59a,0x59a,0x59d,0x59d,0x6ff,0x6ff,0x59a,0x59a,0x59d,0x59d,0x59d,0x59d, +0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x5a0,0x5a0,0x5a0,0x59d,0x59d,0x702,0x59d, +0x702,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59a,0x59d,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a, +0x59d,0x59d,0x59a,0x6ff,0x59a,0x59a,0x59a,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec,0xaec, +0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0x705,0x5a3,0x705,0x705, +0x5a6,0x5a3,0x5a3,0x705,0x705,0x5a6,0x5a3,0x705,0x5a6,0x5a3,0x5a3,0x705,0x5a3,0x705,0x5b2,0x5af, +0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x705,0x705,0x705,0x705,0x5a3,0x5a3,0x705, +0x5a6,0x705,0x5a6,0x705,0x705,0x705,0x705,0x705,0x70b,0x5a9,0x705,0x5a9,0x5a9,0x5a3,0x5a3,0x5a3, +0x705,0x705,0x705,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x705,0x5a3,0x5a3,0x5a3,0x5a6,0x5a3,0x5a3, +0x5a6,0x5a3,0x5a3,0x5a6,0x705,0x5a6,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3, +0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x708,0x705,0x5a6,0x5a3, +0x705,0x705,0x705,0x705,0x5a3,0x5a3,0x705,0x705,0x5a3,0x5a6,0x708,0x708,0x5a6,0x5a6,0x5a3,0x5a3, +0x5a6,0x5a6,0x5a3,0x5a3,0x5a6,0x5a6,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a6,0x5a6,0x705,0x705, +0x5a6,0x5a6,0x705,0x705,0x5a6,0x5a6,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, +0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3, +0x5a3,0x5a3,0x5a3,0x5a3,0x5a6,0x5a6,0x5a6,0x5a6,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, +0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x705,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, +0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, +0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a6,0x5a6,0x5a6,0x5a6,0x5a3,0x5a3,0x5a3,0x5a3, +0x5a3,0x5a3,0x5a6,0x5a6,0x5a6,0x5a6,0x5a3,0x5ac,0x5a3,0x5a3,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf, +0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0x5b5,0xaef,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, +0x5be,0x5bb,0x5be,0x5bb,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x70e,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, +0x5b5,0x5b5,0x7fe,0x7fe,0x5b5,0x5b5,0x5b5,0x5b5,0x5b8,0x5b8,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, +0x5b5,0x804,0x801,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, +0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, +0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0xaef,0xbe5,0xaef,0xaef,0xaef,0x5c1,0x5c1,0x5c1,0x5c1, +0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, +0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x717,0x717,0x717,0x717, +0x717,0x717,0x717,0x717,0x717,0x717,0x5c7,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42, +0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xd50,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, +0x5ca,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x5cd,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x5cd,0x5cd,0x5cd,0x5cd,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x720,0x720,0x720,0x720, +0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x5d0,0x5d0,0x720,0x720, +0x720,0x720,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0x726,0x726,0x5d3,0x723, +0x723,0x723,0x723,0x723,0x723,0x723,0x5d3,0x5d3,0x5d3,0x5d3,0x5d6,0x5d6,0x5d6,0x5d6,0x726,0x726, +0x5d6,0x5d6,0x726,0x726,0x5d3,0x5d3,0x5d3,0x5d3,0x726,0x726,0x5d6,0x5d6,0x726,0x726,0x5d3,0x5d3, +0x5d3,0x5d3,0x726,0x726,0x723,0x5d3,0x5d6,0x726,0x5d3,0x5d3,0x723,0x726,0x726,0x726,0x5d6,0x5d6, +0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x726,0x723, +0x726,0x723,0x5d3,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d3,0x5d3,0x723,0xaf5,0xaf5,0xaf5,0xaf5, +0xaf5,0xaf5,0xaf5,0xaf5,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xc5a,0xc5a,0xbeb,0x5dc,0x5dc,0x5dc,0x5dc, +0x5d9,0x72f,0x72f,0x5d9,0x5d9,0x729,0x5d9,0x5d9,0x5d9,0x5d9,0x729,0x729,0x5d9,0x5d9,0x5d9,0x5d9, +0xd59,0xd59,0xbee,0xbee,0xdda,0xaf8,0x5dc,0x5dc,0x72c,0x5df,0x72c,0x5dc,0x5d9,0x5d9,0x5d9,0x5d9, +0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9, +0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5dc,0x5dc,0x5dc,0x5d9,0x5d9,0x5d9,0x5d9,0x72f,0x5d9,0x72f,0x5d9, +0x5d9,0x5d9,0x5d9,0x5d9,0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x807,0x807, +0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x72f,0x72f,0x5e2,0x72f, +0x729,0x729,0x5d9,0x729,0x72c,0x729,0x729,0x5d9,0x729,0x72f,0x5e2,0x72f,0xaf8,0xaf8,0xbf1,0xbf1, +0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xdd7,0xe8e,0x5e5,0x5e5,0x5e5,0x5e5, +0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5, +0x5e8,0x13a4,0x13a4,0x13a4,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x14e2,0x5ee,0x5ee,0x5ee, +0x5ee,0x13a4,0x5e8,0x5e8,0x5ee,0x5ee,0x13a7,0x13a7,0x5f4,0x5f4,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, +0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x13a4,0x5e8,0x5e8,0x5e8, +0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, +0x5e8,0x735,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, +0x13a4,0x5e8,0x13a4,0x5e8,0x5e8,0x5e8,0x5e8,0x13a4,0x13a4,0x13a4,0x5e8,0x129f,0x5e8,0x5e8,0x5e8,0x5f1, +0x5f1,0x5f1,0x5f1,0x1326,0x1326,0x5e8,0x5eb,0x5eb,0x5ee,0x5e8,0x5e8,0x5e8,0xbf7,0xbf4,0xbf7,0xbf4, +0xbf7,0xbf4,0xbf7,0xbf4,0xbf7,0xbf4,0xbf7,0xbf4,0xbf7,0xbf4,0x732,0x732,0x732,0x732,0x732,0x732, +0x732,0x732,0x732,0x732,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, +0x5e8,0x5e8,0x5e8,0x5e8,0x13a4,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8,0x5e8, +0x5e8,0x5e8,0x5e8,0x13a4,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, +0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, +0x60f,0x60f,0x60f,0x60f,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49, +0xb49,0xb49,0xb49,0xb49,0x615,0x615,0x95a,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x60c,0x60c, +0xbfa,0xd7d,0x1b21,0x1b21,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, +0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, +0x612,0x618,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f, +0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, +0x615,0x60f,0x612,0x618,0x615,0x60f,0x615,0x60f,0x612,0x618,0x615,0x60f,0x612,0x618,0x615,0x60f, +0x615,0x60f,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329,0x1329, +0x615,0x60f,0x615,0x60f,0x615,0x60f,0x612,0x618,0x612,0x618,0x615,0x60f,0x615,0x60f,0x615,0x60f, +0x615,0x60f,0x615,0x60f,0x615,0x60f,0x615,0x60f,0x612,0x615,0x60f,0x612,0x615,0x60f,0x612,0x618, +0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, +0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, +0x60f,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x60f,0x615,0x918,0x91b,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21, +0x1b21,0x1b21,0x1b21,0x1b21,0x612,0x60f,0x612,0x612,0x612,0x612,0x612,0x612,0x60f,0x612,0x60f,0x60f, +0x612,0x612,0x60f,0x60f,0x612,0x612,0x60f,0x612,0x60f,0x612,0x60f,0x60f,0x612,0x60f,0x60f,0x612, +0x60f,0x612,0x60f,0x60f,0x612,0x60f,0x612,0x612,0x60f,0x60f,0x60f,0x612,0x60f,0x60f,0x60f,0x60f, +0x60f,0x612,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f, +0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x60f,0x60f,0x612,0x60f,0x612,0x60f, +0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x618,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x61b,0x61b,0x61b,0x61b,0xfe1,0xfe1,0xfe1,0x14e5, +0x14e5,0x14e5,0x14e5,0x14e5,0x14e5,0x14e5,0x16ef,0x16ef,0x864,0x86a,0x86a,0x876,0x876,0x867,0x85e,0x867, +0x85e,0x867,0x85e,0x867,0x85e,0x867,0x85e,0x867,0x62a,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a, +0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a,0x624,0x627, +0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x62a,0x624,0x62a,0x624,0x62a, +0x624,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, +0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627,0x62d,0x62a,0x624,0x627, +0x62d,0x62a,0x624,0x627,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714, +0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711, +0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711, +0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a, +0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x717,0x717,0x717,0x717, +0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x738,0x738,0x738,0x738, +0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738, +0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0x738,0xc48,0x8c7,0x8be,0x8bb, +0x8c1,0x8c4,0x74d,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x8cd,0x74d,0x74d,0x74d, +0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d, +0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x74d,0x8ca,0x8ca, +0x753,0x8dc,0x8df,0x8e5,0x80a,0x816,0x8fa,0x813,0x8d3,0x8d0,0x8d3,0x8d0,0x8d9,0x8d6,0x8d9,0x8d6, +0x8d3,0x8d0,0x810,0x8e5,0x8d3,0x8d0,0x8d3,0x8d0,0x8d3,0x8d0,0x8d3,0x8d0,0x8eb,0x8f1,0x8ee,0x8ee, +0x759,0x795,0x795,0x795,0x795,0x795,0x795,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f, +0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x78f,0x75c,0x777,0x756,0x77d,0x780, +0x77a,0x792,0x792,0x792,0x792,0x792,0x792,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c, +0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x75c,0x777,0x756,0x777,0xc4b, +0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8, +0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8, +0x7f8,0x7f8,0x1299,0x1299,0x1299,0x1299,0x1299,0x7fb,0x810,0x813,0x813,0x813,0x813,0x813,0x813,0x813, +0x813,0x813,0x933,0x933,0x933,0x933,0x819,0x819,0x8e8,0x8f7,0x8f7,0x8f7,0x8f7,0x8f4,0x80d,0x8e2, +0xb1c,0xb1c,0xb1c,0xc5d,0xc7b,0xc78,0xb3a,0x8b8,0x81f,0x81c,0x81f,0x822,0x81c,0x81f,0x81c,0x81f, +0x81c,0x81f,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81f,0x81f,0x81c,0x81f,0x81f,0x81c,0x81f,0x81f, +0x81c,0x81f,0x81f,0x81c,0x81f,0x81f,0x81c,0x81c,0xc7e,0x831,0x82b,0x831,0x82b,0x831,0x82b,0x831, +0x82b,0x831,0x82b,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b, +0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x82b,0x82e,0x831,0x82b,0x82e,0x82b,0x82e, +0x82b,0x82e,0x82b,0x82b,0x82b,0x82b,0x82b,0x82b,0x82e,0x82e,0x82b,0x82e,0x82e,0x82b,0x82e,0x82e, +0x82b,0x82e,0x82e,0x82b,0x82e,0x82e,0x82b,0x82b,0x82b,0x82b,0x82b,0x831,0x82b,0x831,0x82b,0x831, +0x82b,0x82b,0x82b,0x82b,0x82b,0x82b,0x831,0x82b,0x82b,0x82b,0x82b,0x82b,0x82e,0x831,0x831,0x82e, +0x82e,0x82e,0x82e,0x900,0x903,0x834,0x837,0xc66,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, +0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, +0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, +0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, +0x83d,0x83d,0x83d,0x83d,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849, +0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849, +0xd62,0xd62,0xe91,0x843,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0xd5c,0xd5c,0xd5c,0xd5c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c, +0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c, +0x84c,0x84c,0x84c,0x1a6a,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, +0x912,0x912,0x912,0x912,0x912,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0xd65,0xd65,0xd65,0xd65,0x915, +0x915,0x915,0x915,0x915,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f, +0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f, +0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0xd65,0xd65,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852, +0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852, +0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, +0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855, +0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855, +0x855,0x855,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, +0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0x110a,0x110a,0x110a,0x110a,0x858,0x858,0x858,0x858, +0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, +0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x85b,0x85b, +0x858,0x85b,0x858,0x85b,0x85b,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x85b, +0x858,0x85b,0x858,0x85b,0x85b,0x858,0x858,0x85b,0x85b,0x85b,0x858,0x858,0x858,0x858,0x14a0,0x14a0, +0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f, +0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0x12d8,0x12d8,0x12d8,0x12d8,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0xd5c,0xc69,0xc69,0xc69, +0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0x90f,0x90f,0x90f,0x90f, +0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f, +0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0xc69,0xc69,0xc69, +0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0xc69,0x912,0x912,0x912,0x912, +0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, +0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0xd65,0x99c,0x97e,0x97e,0x97e, +0x97e,0x978,0x97e,0x97e,0x990,0x97e,0x97e,0x97b,0x987,0x98d,0x98d,0x98d,0x98d,0x98d,0x990,0x978, +0x984,0x978,0x978,0x978,0x96c,0x96c,0x978,0x978,0x978,0x978,0x978,0x978,0x993,0x993,0x993,0x993, +0x993,0x993,0x993,0x993,0x993,0x993,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, +0x97b,0x96c,0x978,0x96c,0x978,0x96c,0x98a,0x981,0x98a,0x981,0x999,0x999,0x9a8,0x9a8,0x9a8,0x9a8, +0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8, +0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9ab,0x9ab,0x9ab,0x9ab, +0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, +0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ae,0x9ae,0x9ae,0x9ae, +0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, +0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9b7,0x9b7,0x9b7,0x9b7, +0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, +0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b1,0x9b1,0x9ba,0x9ba,0x9ba,0x9ba, +0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba, +0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9b4,0x9b4,0x9b7,0x9b7,0x9b7,0x9b7, +0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, +0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9ba,0x9ba,0x9ba, +0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba, +0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9ba,0x9bd,0x9c0,0x9c0,0x9c0, +0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, +0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9bd,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, +0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, +0x9c0,0x9c0,0x9c0,0x9c0,0xa4d,0xa4d,0xfc6,0xa4d,0xa4d,0xa4d,0xa50,0xa4d,0xfc6,0xa4d,0xa4d,0xfbd, +0xa47,0xa3b,0xa3b,0xa3b,0xa3b,0xa4a,0xa3b,0xfae,0xfae,0xfae,0xa3b,0xa3e,0xa47,0xa41,0xfb4,0xfc0, +0xfc0,0xfae,0xfae,0xfc6,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xa53,0xa53, +0xa44,0xa44,0xa44,0xa44,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4a,0xa4a,0xa3b,0xa3b,0xfc6,0xfc6, +0xfc6,0xfc6,0xfae,0xfae,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d, +0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d, +0xa4d,0xa4d,0xa4d,0xa4d,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdbc,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xdbc, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, +0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, +0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, +0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6b,0xa71,0xa6e, +0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0x1143,0x1143,0x1143,0x1143,0x1143,0x1143,0x1143,0x1143,0x1143, +0x1140,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e, +0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e,0xa6e, +0xa6e,0xa6e,0xa6e,0xa6e,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xaa7,0xaa7,0xaa7,0xaaa,0xaaa,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7, +0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xa8f,0xa8f,0xaa4,0xa86,0xa86,0xa86,0xa86,0xa86, +0xa86,0xa86,0xaa4,0xaa4,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7, +0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7,0xaa7, +0xaa7,0xaa7,0xaa7,0xaa7,0xacb,0xacb,0xacb,0xacb,0xacb,0xab6,0xab6,0xacb,0xacb,0xacb,0xacb,0xacb, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xace, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb, +0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb,0xaef,0xaef,0xaef,0xaef, +0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef, +0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xafb,0xafb,0xafb,0xafb, +0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb, +0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xafb,0xb0d,0xb0d,0xb0d,0xb0d, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb13,0xb13,0xb13,0xb13, +0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, +0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb1f,0xb1f,0xb1f,0xb1f, +0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, +0xb1f,0xb1f,0xb1f,0xb1f,0x13aa,0x13aa,0x13aa,0x1ae5,0x1ae5,0x1ae5,0x1ae5,0x1ae5,0xb22,0xb22,0xb22,0xb22, +0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, +0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0x1ae8,0x1ae8, +0x1ae8,0x1ae8,0x1ae8,0x1ae8,0x1ae8,0x1ae8,0x1ae8,0x1ae8,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb28,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb2b,0xb2b,0xc6c,0xc6c,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b, +0xb2b,0xb2b,0xb2b,0xb2b,0xc6c,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b, +0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f, +0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0x14e8, +0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xcf0,0xcf0,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, +0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, +0xb55,0xb55,0xced,0xced,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b, +0xd3b,0xd3b,0xd3b,0xd3b,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58, +0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58, +0xb58,0xb58,0xb58,0xb58,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b, +0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b,0xb5b, +0xb5b,0xb5b,0xb5b,0xb5b,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb61,0xb6d,0xb73,0xb73,0xb73,0xb67,0xb67, +0xb67,0xb70,0xb64,0xb64,0xb64,0xb64,0xb64,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb73, +0xb73,0xb73,0xb73,0xb73,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb6a,0xb6a,0xb73,0xb73,0xb73,0xb67,0xb67,0xb73,0xb73,0xb73, +0xb73,0xb73,0xb73,0xb73,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb73,0xb73,0xb73,0xb73,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a, +0xb6a,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0x16f2,0x16f2, +0xb7f,0xb76,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb76,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb76,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb76,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb76,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, +0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, +0xb79,0xb79,0xb79,0xb79,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xb7f,0xb7f,0xb82,0xb82,0xb82,0xb82, +0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82, +0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0x1aee, +0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1aee,0x1bba,0x1bba,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b, +0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b, +0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5, +0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe5,0xbe2,0xbe5, +0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xce4, +0xce7,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xdd4,0xeeb,0xeeb,0xeeb,0xeeb, +0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea, +0xcea,0xcea,0xdd7,0xe8b,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xfdb,0x1278,0x1278, +0xde0,0xde0,0xde0,0xde0,0xde0,0xde6,0xde3,0xefd,0xefd,0xefd,0xefd,0x13e9,0xfed,0x13e9,0x1332,0x1332, +0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24, +0xc24,0xc24,0xc51,0xc4e,0xc51,0xc4e,0xc51,0xc4e,0x1104,0x1101,0xff3,0xff0,0xc27,0xc27,0xc27,0xc27, +0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc2a,0xc2a,0xc2a,0xc2a, +0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a, +0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2d,0xc2d,0xc2d,0xc33, +0xc30,0xc57,0xc54,0xc33,0xc30,0xc33,0xc30,0xc33,0xc30,0xc33,0xc30,0xc33,0xc30,0xc33,0xc30,0xc33, +0xc30,0xc33,0xc30,0xc33,0xc30,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc33,0xc30,0xc33,0xc30,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc33,0xc30,0xc2d,0xc2d,0xc36,0xc36,0xc36,0xc36, +0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc3c,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36, +0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36, +0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc3c,0xc3c,0xc3c,0xc36, +0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36, +0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc39,0xc36,0xc36,0xc36, +0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f, +0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f, +0xcea,0xd56,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xe8b,0xe8b,0xdd7,0xdd7,0xdd7,0xdd7, +0xdd7,0xdd7,0xeee,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0x129c,0x129c,0x127b, +0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e, +0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e, +0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd14,0xd14,0xd14,0xd14,0xd14,0xd11,0xd26,0xd26,0xd26,0xd20, +0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd20,0xd26,0xd26,0xd26,0xd26, +0xd1a,0xd1a,0xd23,0xd23,0xd23,0xd23,0xd17,0xd17,0xd17,0xd17,0xd17,0xd1d,0xdec,0xdec,0xdec,0xdec, +0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xde9,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec, +0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd20,0xd26, +0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd1a,0xd1a,0xd1a, +0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d, +0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d, +0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xd29,0xdef,0xdef, +0xdef,0xdef,0xdef,0xdef,0xf00,0xf00,0xf00,0xf00,0xf00,0xf00,0xf00,0x110d,0x110d,0xff6,0xff6,0xff6, +0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, +0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, +0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32, +0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32,0xd32, +0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b, +0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b, +0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47, +0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5, +0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5, +0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, +0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8, +0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, +0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, +0xebb,0xebb,0xe0d,0xe0d,0xf03,0xf03,0xf03,0xf03,0xf03,0xf03,0xf03,0x1002,0x1002,0x1005,0x1002,0x1002, +0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff, +0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19, +0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19,0xe1c,0xe19, +0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, +0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28, +0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0x1b30, +0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b, +0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0x1b2d, +0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e, +0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e, +0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46, +0xe46,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46,0xf06,0xf06,0xf06,0xf06,0x1008,0x1008,0x1008,0x1008,0x1008, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58, +0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58, +0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61, +0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe5b, +0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e, +0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe61,0xe61,0xe61,0xe61,0xe61, +0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe67,0xe67, +0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe64,0xe6d,0x1014,0x100e,0x101d,0x100b,0xe6a,0xe6a,0x100b,0x100b, +0xe7c,0xe7c,0xe70,0xe7c,0xe7c,0xe7c,0xe73,0xe7c,0xe7c,0xe7c,0xe7c,0xe70,0xe7c,0xe7c,0xe7c,0xe7c, +0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, +0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f, +0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f, +0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97, +0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97, +0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8, +0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113, +0xeeb,0xeeb,0xeeb,0xee8,0xee8,0xee8,0xee8,0xee8,0x114f,0x139b,0x139b,0x139b,0x139b,0x131d,0x131d,0x131d, +0x139e,0x1320,0x1320,0x139e,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x17a6,0x17a6,0x17a6,0x17a6,0x186c, +0xf00,0xf00,0xf00,0xf00,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6,0xff6, +0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9,0xff9, +0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0x14fa,0x14fa,0x14fa,0x14fa,0x14fa,0x14fa,0x14fa,0x14fa,0x14fa, +0x14fa,0x14fa,0x14fa,0x14fa,0x14fa,0x14fd,0x1875,0x1875,0x18f6,0x1875,0x1bd8,0x17ac,0x1335,0x1158,0xf03,0xf03, +0xf21,0xf21,0xf21,0xf21,0xf33,0xf3c,0xf3f,0xf3c,0xf3f,0xf3c,0xf3f,0xf3c,0xf3f,0xf3c,0xf3f,0xf3c, +0xf3c,0xf3c,0xf3f,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c, +0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf24,0xf21,0xf21,0xf21,0xf21,0xf21,0xf21,0xf36, +0xf21,0xf36,0xf33,0xf33,0xf48,0xf45,0xf48,0xf48,0xf48,0xf45,0xf45,0xf48,0xf45,0xf48,0xf45,0xf48, +0xf45,0x102f,0x102f,0x102f,0x116d,0x1026,0x102f,0x1026,0xf45,0xf48,0xf45,0xf45,0x1026,0x1026,0x1026,0x1026, +0x1029,0x102c,0x116d,0x116d,0xf4b,0xf4b,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038, +0x1041,0x1038,0x1041,0x1038,0x1038,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038, +0x1041,0x1038,0x1041,0x1038,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51, +0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51,0xf51, +0xf51,0xf51,0xf51,0xf51,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60, +0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60, +0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b, +0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66, +0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66, +0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0xfae,0xfc6,0xfbd,0xfc3,0xfc3,0xfc6,0xfc6,0xfbd, +0xfbd,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc6,0xfc6,0xfc6,0xfae,0xfae,0xfae,0xfae,0xfc6,0xfc6,0xfc6, +0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfae,0xfbd,0xfc0,0xfae,0xfae,0xfc3, +0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfb1,0xfc6,0xfc3,0xfba,0xfba,0xfba,0xfba,0xfba,0xfba,0xfba,0xfba, +0xfba,0xfba,0x1137,0x1137,0x1134,0x1131,0xfb7,0xfb7,0xfde,0xfde,0xfde,0xfde,0x129c,0x129c,0x127b,0x127b, +0x127b,0x1278,0x1278,0x1278,0x1278,0x127b,0x13a1,0x127b,0x127b,0x127b,0x1278,0x127b,0x129c,0x1278,0x1278,0x1278, +0x127b,0x127b,0x1278,0x1278,0x127b,0x1278,0x1278,0x127b,0xff9,0xff9,0xff9,0xff9,0xff9,0xff6,0xff6,0xff9, +0xff9,0xff9,0xff9,0xff9,0xff9,0x14f4,0x14f4,0x14f4,0x110d,0xff6,0xff6,0xff6,0xff6,0x12a8,0x1284,0x1284, +0x1284,0x1284,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x101a,0x101a,0x1017,0x1011,0x1017,0x1011,0x1017,0x1011, +0x1017,0x1011,0x100e,0x100e,0x100e,0x100e,0x1023,0x1020,0x100e,0x116a,0x13f5,0x13f8,0x13f8,0x13f5,0x13f5,0x13f5, +0x13f5,0x13f5,0x13fb,0x13fb,0x150f,0x1503,0x1503,0x1500,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038, +0x1035,0x1032,0x1032,0x1041,0x1038,0x1341,0x133e,0x16fb,0x1341,0x133e,0x1404,0x1401,0x1512,0x1512,0x1518,0x1512, +0x1518,0x1512,0x1518,0x1512,0x1518,0x1512,0x1518,0x1512,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038, +0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038, +0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1038,0x103b,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038, +0x1038,0x1041,0x1038,0x1041,0x1038,0x1041,0x1041,0x1038,0x1044,0x1044,0x104a,0x1050,0x1050,0x1050,0x1050,0x1050, +0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050, +0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x104a,0x1044,0x1044,0x1044,0x1044,0x104a,0x104a, +0x1044,0x1044,0x104d,0x140d,0x140a,0x140a,0x1050,0x1050,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047, +0x1047,0x1047,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065, +0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065, +0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x1065,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e, +0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e,0x106e, +0x1071,0x1071,0x1071,0x1074,0x1071,0x1071,0x1077,0x1077,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a, +0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a, +0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083, +0x1083,0x1083,0x1083,0x1083,0x1086,0x107d,0x108c,0x1089,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083, +0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083, +0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x1347,0x1344,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098, +0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109b,0x111f,0x108f,0x108f,0x108f,0x1095,0x1413,0x1413,0x1413,0x1413, +0x1413,0x1413,0x1413,0x1413,0x1092,0x1092,0x1095,0x10a1,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098, +0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098, +0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x109e,0x1098,0x1527,0x1524,0x1527,0x1524,0x152a,0x152a,0x1704,0x1413, +0x10aa,0x10aa,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad, +0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad,0x10ad, +0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10bc,0x10bc,0x10bc,0x10bc, +0x10bc,0x10bc,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b6,0x10b6,0x10b6,0x1116,0x10bf,0x10ce,0x10ce,0x10ce,0x10ce, +0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10b9,0x10b9,0x10b9,0x10b9, +0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc, +0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10e0,0x10e0,0x10e0,0x10e0, +0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0, +0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10f2,0x10f2,0x10f2,0x10f2, +0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2, +0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10fb,0x10fb,0x10fb,0x10fb, +0x1110,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb, +0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fe,0x10fe,0x10fe,0x10fe, +0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe, +0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x110a,0x110a,0x110a,0x110a, +0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x149d,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785, +0x1785,0x1785,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x117f,0x117f,0x117f,0x117f, +0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f,0x117f, +0x117f,0x117f,0x1176,0x1176,0x1179,0x1179,0x117f,0x1176,0x1176,0x1176,0x1176,0x1176,0x1185,0x1185,0x1185,0x1185, +0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185, +0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x1185,0x11a0,0x11a0,0x11a0,0x11a0, +0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0, +0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11ac,0x11ac,0x11ac,0x11ac, +0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac, +0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11a9,0x11af,0x11bb,0x11bb,0x11bb,0x11bb, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11c1,0x11c1,0x11c1,0x11c1, +0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x12ed,0x11c7,0x12f0, +0x11c7,0x11c7,0x11c7,0x11c7,0x11c4,0x11c4,0x11c4,0x11c7,0x1707,0x170a,0x1932,0x192f,0x11ca,0x11ca,0x11ca,0x11d9, +0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df, +0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11cd, +0x11d9,0x11d9,0x11ca,0x11ca,0x11ca,0x11ca,0x11d9,0x11d9,0x11ca,0x11ca,0x11d9,0x11d9,0x11eb,0x11eb,0x11eb,0x11eb, +0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11ee,0x11eb,0x11eb,0x11eb, +0x11eb,0x11eb,0x11eb,0x11e5,0x11e5,0x11e5,0x11eb,0x11e8,0x1530,0x1533,0x1536,0x1536,0x11fd,0x11fd,0x11fd,0x11fd, +0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11f1,0x11fd,0x11f1,0x11f1, +0x11f1,0x1206,0x1206,0x11f1,0x11f1,0x1206,0x11fd,0x1206,0x1206,0x11fd,0x11f1,0x11f4,0x11fd,0x11fd,0x11fd,0x11fd, +0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd, +0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x1218,0x1218,0x1218,0x1218, +0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218, +0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1218,0x1230,0x1230,0x1230,0x1230, +0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230, +0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x122d,0x122d,0x122d,0x1239,0x1239,0x1239,0x1239, +0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239, +0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1248,0x1248,0x1248,0x1248, +0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248, +0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x124e,0x124e,0x125a,0x125d, +0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d, +0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x1260,0x125d,0x1260,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d, +0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x1260,0x125d,0x125d,0x125d,0x125d,0x125a,0x125a,0x125a,0x124e, +0x124e,0x124e,0x124e,0x125a,0x125a,0x1254,0x1251,0x1257,0x1257,0x1266,0x1263,0x1263,0x1269,0x1269,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x126f,0x126f,0x126f,0x126c, +0x126c,0x126c,0x1269,0x1269,0x1269,0x1269,0x126c,0x1269,0x1269,0x1269,0x126f,0x126c,0x126f,0x126c,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x126f,0x126c,0x126c,0x1269,0x1269,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1be1,0x1a07,0x1a07,0x1a07,0x1a07, +0x1a07,0x1a07,0x1a07,0x1a0a,0x1a04,0x1bf3,0x1bf3,0x1bf3,0x1bf6,0x1bf0,0x1bf6,0x1bf0,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128d,0x128d,0x128d,0x1272,0x1938,0x1398,0x1296,0x1398,0x1398, +0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1296,0x1398,0x1296,0x127b,0x127b,0x1323,0x1278, +0x1323,0x1323,0x1323,0x1323,0x1278,0x1278,0x129c,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x127b,0x129c,0x129c, +0x127b,0x129c,0x1278,0x127b,0x127b,0x127e,0x129c,0x1278,0x1278,0x129c,0x127b,0x127b,0x1395,0x1395,0x1395,0x1395, +0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1287,0x1287,0x1287,0x1287,0x13ad,0x138f,0x1290,0x13ad,0x13ad,0x13ad, +0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x1836,0x1836,0x1836,0x1836,0x1836,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x1539,0x1539,0x1a88,0x1a88,0x1a88,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x1398,0x1398,0x1296,0x1398, +0x1398,0x1398,0x1296,0x1398,0x1398,0x1398,0x1290,0x1290,0x1290,0x1290,0x1290,0x1392,0x1395,0x1395,0x1395,0x1395, +0x1395,0x1395,0x1395,0x1293,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1293,0x1395,0x1395,0x1395,0x1395, +0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1416,0x1416,0x19e6,0x1a88,0x1a88,0x1a88,0x1395,0x1395,0x1395,0x1395, +0x1395,0x1395,0x1395,0x1395,0x1395,0x1293,0x1395,0x1293,0x1293,0x1395,0x1395,0x1293,0x12b7,0x12b7,0x12b7,0x12b7, +0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7, +0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x1341,0x133e,0x1341,0x133e, +0x1341,0x133e,0x1341,0x133e,0x1341,0x133e,0x1404,0x1518,0x1518,0x1518,0x17b2,0x1926,0x1518,0x1518,0x16fe,0x16fe, +0x16fe,0x16f8,0x16fe,0x16f8,0x1929,0x1926,0x19e3,0x19e0,0x19e3,0x19e0,0x19e3,0x19e0,0x1365,0x1365,0x1365,0x1365, +0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365, +0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x137a,0x136b,0x137a,0x1380, +0x1380,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d, +0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x137d,0x136b,0x136b,0x136b,0x136b, +0x136b,0x136b,0x136b,0x136b,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386, +0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386, +0x1386,0x1386,0x1386,0x1386,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x13b6,0x13b3,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, +0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, +0x18db,0x18db,0x18db,0x18db,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bc,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bc,0x13bf,0x13bf,0x13bf,0x13bc,0x13bf,0x13bc,0x13bf,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13c2,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bc,0x13bf,0x13bc,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc, +0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x13bc,0x1542,0x1542, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1788,0x1788,0x1788, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x1788,0x1788,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13c2,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1542,0x1542,0x1545,0x1545,0x13bf,0x13bf,0x13c2,0x13c2,0x13c2,0x16b3,0x13bf,0x13c2, +0x13bf,0x13bf,0x13c2,0x1548,0x1548,0x1545,0x1545,0x1788,0x1788,0x1788,0x1788,0x1788,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x1545,0x1545,0x16b3,0x1545,0x1545,0x1545, +0x1788,0x1788,0x1788,0x178b,0x178b,0x178b,0x178b,0x178b,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x13bf,0x1545,0x13c2,0x13c2,0x13bf,0x13bf,0x13c2,0x13c2, +0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2, +0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13c2,0x13bf,0x13bf,0x13bf, +0x13c2,0x13bf,0x13bf,0x13bf,0x13bf,0x13c2,0x13c2,0x13c2,0x13bf,0x13c2,0x13c2,0x13c2,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13c2,0x13bf,0x13c2,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x16b3,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x1545,0x1788,0x1419,0x1419,0x1419,0x1419,0x1542,0x1542,0x1542,0x1542, +0x1542,0x1542,0x1545,0x1788,0x1788,0x1788,0x1788,0x170d,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1548,0x1548,0x1545,0x1545,0x1545,0x1545,0x183f,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1545,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x14a3,0x13c5,0x13c5,0x13c5, +0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x14a3,0x13c5,0x13c5, +0x13c5,0x14a3,0x13c5,0x14a3,0x13c5,0x14a3,0x13c5,0x14a3,0x13c5,0x13c5,0x13c5,0x14a3,0x13c5,0x13c5,0x13c5,0x13c5, +0x13c5,0x13c5,0x14a3,0x14a3,0x13c5,0x13c5,0x13c5,0x13c5,0x14a3,0x13c5,0x14a3,0x14a3,0x13c5,0x13c5,0x13c5,0x13c5, +0x14a3,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x16b9,0x16b9,0x178e, +0x178e,0x13c8,0x13c8,0x13c8,0x13c5,0x13c5,0x13c5,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x1638,0x1638,0x1638,0x1638, +0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x13cb,0x13cb,0x13cb,0x13cb, +0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb, +0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13ce, +0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb, +0x13ce,0x13ce,0x13ce,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13d1,0x13d1,0x13d1,0x13d1, +0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1, +0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x13d1,0x17bb,0x17bb,0x17b8,0x1710, +0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141c,0x141c,0x141c,0x141c,0x141c,0x141c,0x141f,0x141f,0x141f,0x141f, +0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x141f,0x154e,0x1422,0x1551,0x1422,0x1422, +0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1713, +0x1713,0x1b5d,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x1a8b,0x1a8b,0x142b,0x142b,0x142b,0x143d, +0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d, +0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x1458,0x1458,0x1458,0x1458, +0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458, +0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x145e,0x145e,0x145e,0x145e, +0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e, +0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x19ec,0x1461,0x1461,0x1461,0x1461, +0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461, +0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1467,0x1467,0x1473,0x1479, +0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479, +0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1473, +0x1473,0x1473,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1473,0x1476,0x1479,0x147c,0x147c, +0x1479,0x147f,0x147f,0x146a,0x146d,0x1719,0x171c,0x171c,0x171c,0x1557,0x1a94,0x1a91,0x1470,0x1470,0x1470,0x1470, +0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1554,0x1722,0x1725,0x171f,0x1728,0x1728,0x149a,0x149a,0x149a,0x149a, +0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a, +0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x14f4,0x14f4,0x14f4,0x14f4, +0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4, +0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x190e,0x190e,0x190e,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4, +0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x19da,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x1872,0x190e, +0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1548,0x1545,0x1545,0x1545,0x1545,0x16b6,0x16b6,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x183c,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566, +0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566,0x1566, +0x1566,0x1566,0x1566,0x1566,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578, +0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578, +0x1578,0x1578,0x1578,0x1578,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e, +0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e, +0x157e,0x157e,0x157e,0x157e,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581, +0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581,0x1581, +0x1581,0x1581,0x1581,0x1581,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1, +0x15ab,0x15ab,0x15ab,0x159f,0x159f,0x159f,0x15ab,0x15ab,0x159f,0x15ae,0x15a2,0x159f,0x15b4,0x15b4,0x15a8,0x15b4, +0x15b4,0x15a5,0x17c4,0x1bed,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, +0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, +0x15c6,0x15c6,0x15c6,0x15b7,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf, +0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15cf,0x15c9, +0x15d2,0x15d2,0x15d2,0x15d2,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15e7,0x15f0,0x15f0,0x15f0, +0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0, +0x15f0,0x15f0,0x15f0,0x15f0,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9, +0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9, +0x15f9,0x15f9,0x15f9,0x15f9,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b, +0x160b,0x160b,0x160b,0x160b,0x1608,0x1608,0x1608,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x1608, +0x1608,0x15fc,0x1608,0x15ff,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b, +0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b, +0x160b,0x160b,0x160b,0x160b,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f, +0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f, +0x162f,0x162c,0x162c,0x162c,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638, +0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x163e,0x163e,0x163e,0x163b,0x163b,0x163b, +0x1638,0x1638,0x1638,0x1638,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x164d,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1653,0x1653,0x1647,0x1644,0x1644, +0x1644,0x1644,0x1644,0x1644,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x164d,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659, +0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1656,0x1656,0x1656,0x1656,0x1656, +0x1656,0x1656,0x1656,0x1656,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c, +0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c, +0x165c,0x165c,0x165c,0x165c,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680, +0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680, +0x1680,0x1680,0x1680,0x1680,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689, +0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689, +0x1689,0x1689,0x1689,0x1689,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1, +0x16a1,0x16a1,0x16a1,0x16a1,0x168c,0x169b,0x169b,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x169b,0x168c,0x169e, +0x169e,0x168c,0x169e,0x168c,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1, +0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1,0x16a1, +0x16a1,0x16a1,0x16a1,0x16a1,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa, +0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa, +0x16aa,0x16aa,0x16aa,0x16aa,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0, +0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0, +0x16b0,0x16b0,0x16b0,0x16b0,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e, +0x16f5,0x16f5,0x16f5,0x16f5,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e, +0x190e,0x190e,0x190e,0x19da,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716, +0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716, +0x1716,0x1716,0x1716,0x1716,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755, +0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755, +0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x175b,0x1758,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755, +0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e, +0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e, +0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761, +0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761, +0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773, +0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773, +0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776, +0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776, +0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779, +0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779, +0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x177c,0x177c,0x177c,0x177c,0x1779, +0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x177c,0x177c,0x177c, +0x177c,0x177c,0x177c,0x177c,0x177c,0x1779,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c, +0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c, +0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x177c,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794, +0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794, +0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e, +0x187e,0x187e,0x187e,0x187e,0x1af7,0x1a4c,0x1a4c,0x1a4f,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797, +0x179a,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x18e4,0x1797,0x1797,0x1797,0x1797,0x1797,0x1845,0x1845,0x1845, +0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, +0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x1797,0x19c5,0x19c5,0x1a4c,0x1a4c,0x1a4c,0x1a4c,0x1a4c, +0x1a4c,0x1a4c,0x1a4c,0x1af4,0x1bc6,0x1a4f,0x1a4f,0x1a4f,0x18e1,0x18e4,0x18e4,0x18e4,0x18e4,0x18e4,0x18e4,0x18e4, +0x18e4,0x18e4,0x18e4,0x18e4,0x18e4,0x18e4,0x18e1,0x18e1,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b, +0x1b5d,0x1b60,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x1b5a,0x193b,0x17be,0x17be,0x17be,0x17be, +0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1848,0x1845, +0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x1848,0x18e4,0x18e4,0x1848,0x1848,0x1848,0x1848,0x1848, +0x1848,0x1848,0x1845,0x17c7,0x1848,0x1848,0x1848,0x1a4c,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x17c7,0x1845, +0x1845,0x1845,0x1845,0x1845,0x18e1,0x19c5,0x19c5,0x19c5,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845, +0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x18e1,0x17dc,0x17dc,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9, +0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9, +0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc, +0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc, +0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, +0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x1827,0x1827,0x1827, +0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, +0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, +0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e, +0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e, +0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851, +0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851, +0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1b00,0x1b00,0x1b00,0x1b00,0x1b00, +0x1b00,0x1b00,0x1b00,0x1b00,0x1b00,0x1b00,0x1b00,0x1b00,0x18a8,0x18a8,0x18a8,0x18a8,0x19fe,0x19fe,0x18ab,0x18ab, +0x18ab,0x18ab,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x1893,0x18a5, +0x1896,0x1899,0x189c,0x18ae,0x18ae,0x194d,0x189f,0x189f,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8, +0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8, +0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18a8,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9, +0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18b4,0x18ba,0x18b7,0x18b7,0x18b7, +0x18b7,0x18c6,0x18cc,0x18b7,0x18b7,0x18b7,0x18b7,0x18c3,0x18c9,0x18b7,0x18b7,0x18b7,0x18b7,0x18b7,0x18b7,0x18b7, +0x18b7,0x18b7,0x18b7,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9, +0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5, +0x18d5,0x18d5,0x18d5,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x1aeb,0x1aeb,0x1aeb,0x1aeb,0x1aeb,0x1aeb,0x1aeb,0x1aeb, +0x1aeb,0x1aeb,0x1aeb,0x1aeb,0x1aeb,0x1bb7,0x1bb7,0x1bb7,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, +0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db, +0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1, +0x18e1,0x18e1,0x18e1,0x18e1,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x1a4c,0x1af4,0x19c5,0x19c5,0x19c5,0x19c5,0x1af7, +0x1af4,0x1bc6,0x19c5,0x1a4c,0x19c5,0x19c5,0x19c5,0x19c5,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x18e1,0x19c5, +0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5, +0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea, +0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea, +0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed, +0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed, +0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x18ed,0x1bc9,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962, +0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962, +0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x1962,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d, +0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d, +0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x197d,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983, +0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983, +0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1, +0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1, +0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4, +0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4, +0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19a4,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, +0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, +0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19aa,0x19aa,0x19aa,0x19c5,0x19c5,0x19c5,0x1af4,0x1af4,0x1a4c,0x1a4c,0x1a4c, +0x1a4c,0x1a4c,0x1a4c,0x1af4,0x1af4,0x1af4,0x1a4c,0x1a4c,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c8,0x19c8,0x19c5, +0x19c8,0x19c8,0x1a4c,0x1a4f,0x1a4c,0x1a4c,0x1a4c,0x1a4c,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01, +0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01, +0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a01,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28, +0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28, +0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a28,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31, +0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a67,0x1a67,0x1a31,0x1a67,0x1a31,0x1a31,0x1a31,0x1a31, +0x1a31,0x1a31,0x1a31,0x1a31,0x1a31,0x1a37,0x1a37,0x1a37,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43, +0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43, +0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1a43,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6, +0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6, +0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ad6,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2, +0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2, +0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1ae2,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06, +0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06, +0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09, +0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09, +0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e, +0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e, +0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b7e,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f, 0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f, -0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2, -0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2,0x1ba2, -0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc,0x1bfc, -0x1bfc,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9,0x1bf9, -0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, -0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, -0,0,0,0 +0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1b9f,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1, +0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1, +0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb1,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4, +0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4, +0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1bb4,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e, +0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0e,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b, +0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d, +0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d, +0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c4d,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59, +0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59, +0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0,0,0,0 }; static const UTrie2 propsVectorsTrie={ propsVectorsTrie_index, - propsVectorsTrie_index+5348, + propsVectorsTrie_index+5368, nullptr, - 5348, - 27344, + 5368, + 27396, 0xa40, - 0x1564, + 0x1578, 0x0, 0x0, 0x110000, - 0x7fb0, + 0x7ff8, nullptr, 0, false, false, 0, nullptr }; -static const uint32_t propsVectors[7230]={ +static const uint32_t propsVectors[7260]={ 0x67,0,0,0x67,0,0xe00000,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67, 0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1167,0,0,0x1267,0, 0,0x1367,0,0,0x1467,0,0,0x1567,0,0,0x1667,0,0,0x1767,0,0, 0x1867,0,0,0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67,0,0,0x1f67, 0,0,0x2067,0,0,0x2267,0,0,0x2367,0,0,0x2467,0,0,0x2567,0, 0,0x2767,0,0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000,0x2b67,0,0, -0x2d67,0,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67,0,0,0x3c67, -0,0,0x3e67,0,0,0x4067,0,0,0x4167,0,0,0x4467,0,0,0x4867,0, -0,0x4967,0,0,0x4a67,0,0,0x5067,0,0,0x5167,0,0,0x5467,0,0, -0x5567,0,0,0x5667,0x80000,0x20,0x5767,0,0,0x5867,0,0,0x5967,0,0,0x5b67, -0,0,0x5c67,0,0,0x5d67,0,0,0x6067,0x80000,0x20,0x6267,0,0,0x6367,0, -0,0x6467,0,0,0x6567,0,0,0x6f67,0,0,0x7067,0,0,0x7367,0x20000000,0, -0x7567,0,0,0x7667,0,0,0x7767,0,0,0x7867,0,0,0x7a67,0,0,0x7b67, -0,0,0x7c67,0,0,0x7e67,0,0,0x7f67,0,0,0x8167,0,0,0x8267,0, -0,0x8467,0,0,0x8567,0,0,0x8667,0,0,0x8767,0,0,0x8967,0,0, -0x8b67,0,0,0x8c67,0,0,0x8e67,0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167, -0,0,0x9267,0,0,0x9367,0,0,0x9567,0,0,0x9667,0,0,0x9767,0, -0,0x9867,0,0,0x9967,0,0,0x9a67,0,0,0x9c67,0,0,0x9f67,0,0, -0xa167,0,0,0xa367,0,0,0xa467,0,0,0xa567,0,0,0xa667,0,0,0xa767, -0,0,0xa867,0,0,0xa967,0,0,0xaa67,0,0xe00000,0xab67,0,0xe00000,0xac67,0, -0,0xad67,0,0,0xae67,0,0,0xaf67,0,0,0xb167,0,0,0xb267,0,0, -0xb467,0,0,0xb567,0,0,0xb767,0,0,0xb867,0,0,0xb967,0,0,0xba67, -0,0,0xbc67,0,0,0xbd67,0,0,0xbe67,0,0,0xbf67,0,0,0xc067,0, -0,0xc167,0,0,0xc367,0,0xe00000,0xc467,0,0xe00000,0xc667,0,0,0xc767,0,0, -0xc867,0,0,0xc967,0,0,0xca67,0,0,0xcc67,0,0xe00000,0xcf67,0,0xe00000,0xd067, -0,0xe00000,0xd367,0,0,0xd467,0,0,0xd567,0,0,0xd667,0,0,0xd867,0, -0,0xda67,0,0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0, -0xdf67,0,0,0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0xe00000,0xe467, -0,0,0xe567,0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0, -0,0xea67,0,0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0, -0xef67,0,0,0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767, -0,0,0xf867,0,0,0xf967,0,0,0xfa67,0,0xe00000,0xfb67,0,0,0xfc67,0, -0,0xfd67,0,0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0, -0x10467,0,0,0x10667,0,0,0x10767,0,0,0x10867,0,0,0x10967,0,0,0x10a67, -0,0,0x10b67,0,0,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0,0x10f67,0, -0,0x11067,0,0,0x11367,0,0,0x11467,0,0,0x11567,0,0,0x11667,0,0, -0x11767,0,0,0x11867,0,0,0x11967,0,0xe00000,0x11a67,0,0,0x11b67,0,0,0x11c67, -0,0,0x11d67,0,0,0x11e67,0,0,0x11f67,0,0,0x12067,0,0,0x12167,0, -0,0x12267,0,0,0x12367,0,0,0x12467,0,0,0x12567,0,0,0x12667,0,0, -0x12767,0,0,0x12867,0,0,0x12967,0,0,0x12a67,0,0xe00000,0x12b67,0,0,0x12c67, -0,0,0x12d67,0,0,0x12f67,0,0,0x13067,0,0,0x13167,0,0,0x13267,0, -0,0x13367,0,0,0x13467,0,0,0x13567,0,0,0x13667,0,0,0x13767,0,0, -0x13867,0,0,0x13967,0,0,0x13a67,0,0,0x13b67,0,0,0x13c67,0,0,0x13d67, -0,0,0x13f67,0,0,0x14067,0,0,0x14167,0,0,0x14367,0,0,0x14467,0, -0,0x14567,0,0,0x14667,0,0,0x14767,0,0,0xa0067,0,0xe00000,0xa4f67,0,0xe00000, -0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0,0xe00000,0xb0067,0,0xe00000,0xb1267,0,0xe00000,0xb2e67, +0x2d67,0,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67,0,0,0x3e67, +0,0,0x4067,0,0,0x4167,0,0,0x4467,0,0,0x4867,0,0,0x4967,0, +0,0x4a67,0,0,0x5067,0,0,0x5167,0,0,0x5467,0,0,0x5567,0,0, +0x5667,0x80000,0x20,0x5767,0,0,0x5867,0,0,0x5967,0,0,0x5b67,0,0,0x5c67, +0,0,0x5d67,0,0,0x6067,0x80000,0x20,0x6267,0,0,0x6367,0,0,0x6467,0, +0,0x6567,0,0,0x6f67,0,0,0x7067,0,0,0x7367,0x20000000,0,0x7567,0,0, +0x7667,0,0,0x7767,0,0,0x7867,0,0,0x7a67,0,0,0x7b67,0,0,0x7c67, +0,0,0x7e67,0,0,0x7f67,0,0,0x8167,0,0,0x8267,0,0,0x8467,0, +0,0x8567,0,0,0x8667,0,0,0x8767,0,0,0x8967,0,0,0x8b67,0,0, +0x8c67,0,0,0x8e67,0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167,0,0,0x9267, +0,0,0x9367,0,0,0x9567,0,0,0x9667,0,0,0x9767,0,0,0x9867,0, +0,0x9967,0,0,0x9a67,0,0,0x9c67,0,0,0x9f67,0,0,0xa167,0,0, +0xa367,0,0,0xa467,0,0,0xa567,0,0,0xa667,0,0,0xa767,0,0,0xa867, +0,0,0xa967,0,0,0xaa67,0,0xe00000,0xab67,0,0xe00000,0xac67,0,0,0xad67,0, +0,0xae67,0,0,0xaf67,0,0,0xb167,0,0,0xb267,0,0,0xb467,0,0, +0xb567,0,0,0xb767,0,0,0xb867,0,0,0xb967,0,0,0xba67,0,0,0xbc67, +0,0,0xbd67,0,0,0xbe67,0,0,0xbf67,0,0,0xc067,0,0,0xc167,0, +0,0xc367,0,0xe00000,0xc467,0,0xe00000,0xc667,0,0,0xc767,0,0,0xc867,0,0, +0xc967,0,0,0xca67,0,0,0xcc67,0,0xe00000,0xcf67,0,0xe00000,0xd067,0,0xe00000,0xd367, +0,0,0xd467,0,0,0xd567,0,0,0xd667,0,0,0xd867,0,0,0xda67,0, +0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0,0xdf67,0,0, +0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0xe00000,0xe467,0,0,0xe567, +0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0,0,0xea67,0, +0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0,0xef67,0,0, +0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767,0,0,0xf867, +0,0,0xf967,0,0,0xfa67,0,0xe00000,0xfb67,0,0,0xfc67,0,0,0xfd67,0, +0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0,0x10467,0,0, +0x10667,0,0,0x10767,0,0,0x10867,0,0,0x10967,0,0,0x10a67,0,0,0x10b67, +0,0,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0,0x10f67,0,0,0x11067,0, +0,0x11367,0,0,0x11467,0,0,0x11567,0,0,0x11667,0,0,0x11767,0,0, +0x11867,0,0,0x11967,0,0xe00000,0x11a67,0,0,0x11b67,0,0,0x11c67,0,0,0x11d67, +0,0,0x11e67,0,0,0x11f67,0,0,0x12067,0,0,0x12167,0,0,0x12267,0, +0,0x12367,0,0,0x12467,0,0,0x12567,0,0,0x12667,0,0,0x12767,0,0, +0x12867,0,0,0x12967,0,0,0x12a67,0,0xe00000,0x12b67,0,0,0x12c67,0,0,0x12d67, +0,0,0x12f67,0,0,0x13067,0,0,0x13167,0,0,0x13267,0,0,0x13367,0, +0,0x13467,0,0,0x13567,0,0,0x13667,0,0,0x13767,0,0,0x13867,0,0, +0x13967,0,0,0x13a67,0,0,0x13b67,0,0,0x13c67,0,0,0x13d67,0,0,0x13f67, +0,0,0x14067,0,0,0x14167,0,0,0x14367,0,0,0x14467,0,0,0x14567,0, +0,0x14667,0,0,0x14767,0,0,0xa0067,0,0xe00000,0xa4f67,0,0xe00000,0xa5f67,0,0xe00000, +0xac567,0,0xe00000,0xad167,0,0xe00000,0xb0067,0,0xe00000,0xb1267,0,0xe00000,0xb2e67,0,0xe00000,0xb4867, 0,0xe00000,0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020,0x11000100,0x40000001,0xa5a040,0x11000100,0x40000001, 0x116a8a0,0x11000200,0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200,0x24000000,0x200000,0x11000200,0x24000008,0x1710000, 0x11000200,0x40000001,0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100,0x220401,0x11000319,0x7c00100,0x220402,0x11000319, @@ -3590,7 +3603,7 @@ static const uint32_t propsVectors[7230]={ 0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200400,0x11000a03,0x4000000,0x201000,0x11000a03,0x4000000,0x270000,0x11000a03,0x7c00100,0x220400, 0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000a03,0xc000010,0x1049400,0x11000b13,0x2802500,0x962460,0x11000b13, 0x4000000,0x200000,0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000,0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100, -0x2633800,0x11000c00,0x80000000,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,0x4000000,0x200000,0x11000c02,0x4000000,0x1329400, +0x2633800,0x11000c00,0x80000000,0x1329960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,0x4000000,0x200000,0x11000c02,0x4000000,0x1329400, 0x11000c02,0x4000000,0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100,0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02, 0x7c00100,0x230402,0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000f01,0x2802400,0x962460,0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400, 0x962460,0x11000f0a,0x2806400,0x962460,0x11000f0a,0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a,0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401, @@ -3614,7 +3627,7 @@ static const uint32_t propsVectors[7230]={ 0x201c00,0x11002800,0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800,0x24000000,0x810000,0x11002800,0x24000000,0x1410000, 0x11002800,0x24000000,0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006,0xc0000b,0x11002800,0x24000008,0x1410000,0x11002800, 0x24000008,0x1710000,0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000,0x11002800,0x2c000010,0x1248002,0x11002800,0x40000001, -0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x80000,0x2a65620,0x11002801,0x82000,0x962460,0x11002900,0x4000000,0x20000e,0x11002900,0x4000000,0x20000f, +0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x2880000,0x2a65620,0x11002801,0x2882000,0x962460,0x11002900,0x4000000,0x20000e,0x11002900,0x4000000,0x20000f, 0x11002900,0x4000020,0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020,0x81000f,0x11002900,0x4000020,0x141000e,0x11002900, 0x4000020,0x141000f,0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000,0x11002a00,0x4000000,0x1600000,0x11002a00,0x4000000, 0x1600002,0x11002b01,0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00,0x4000000,0x200002,0x11002c00,0x4000000,0x20000f, @@ -3670,17 +3683,17 @@ static const uint32_t propsVectors[7230]={ 0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c,0x110a5400,0x4000000,0x160000c,0x110a5400,0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400,0x4000010, 0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400,0x4000010,0x127100c,0x110a5400,0x4000020,0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010,0x80ac0c, 0x110a5400,0xc000010,0xb4800c,0x11400c0c,0x4000010,0xb00000,0x11400c0c,0x4000010,0x1071400,0x11400c17,0xc000010,0xb48000,0x11400c1e,0x7c00900,0x230400,0x11400f4b, -0xc000010,0x448000,0x11400f5f,0xc000010,0x448000,0x11401d94,0x4000000,0x200000,0x11403dca,0x4000000,0xe00000,0x114457bf,0x4000004,0x120000a,0x114457bf,0x4000008, -0x81000a,0x114457bf,0x4000008,0x141000a,0x114457bf,0x4000010,0x87000a,0x114457bf,0xc000010,0x84800a,0x114457c8,0x3802500,0x126246a,0x114457c8,0x7c00d00,0x2530c0a, -0x114a3dbf,0x24000000,0x810000,0x114a3dbf,0x24000000,0x1410000,0x114a3dbf,0x24000008,0x810000,0x114a3dbf,0x24000008,0x1410000,0x114a3dbf,0x24000010,0x870000,0x114a3dbf, -0x2c000010,0x848000,0x114a3dc5,0x4000000,0xe00000,0x114a3dc5,0x24000000,0xe00000,0x114a3dc5,0x24000002,0xe00000,0x114a3dc5,0x24000002,0x1200000,0x114a3dc5,0x24000008, -0x810000,0x114a3dc5,0x24000008,0x1410000,0x114a3dc8,0x7c00900,0x930c00,0x114a3dc8,0x7c00900,0xe30c00,0x114a3dca,0x7c00300,0xe30000,0x114a3ec8,0x7000400,0x1200c02, -0x114a3fbf,0x4000004,0x1200000,0x114a3fc8,0x7c00d00,0x2530c00,0x114a42ca,0x4000000,0xe00000,0x114a42ca,0x4000000,0xe0000f,0x114a44ca,0x4000000,0xe00002,0x114a44ca, -0x4000000,0xe00003,0x114a45ca,0x4000000,0xe00002,0x114a45ca,0x4000000,0xe0000d,0x11505103,0x24000000,0x810000,0x11505103,0x24000000,0x1410000,0x1180090a,0x2802400, +0xc000010,0x448000,0x11400f5f,0xc000010,0x448000,0x11401d94,0x4000000,0x200000,0x11403dcc,0x4000000,0xe00000,0x114457c1,0x4000008,0x81000a,0x114457c1,0x4000008, +0x141000a,0x114457c1,0x4000010,0x87000a,0x114457c1,0x6800004,0x120000a,0x114457c1,0xc000010,0x84800a,0x114457ca,0x3802500,0x126246a,0x114457ca,0x7c00d00,0x2530c0a, +0x114a3dc1,0x24000000,0x810000,0x114a3dc1,0x24000000,0x1410000,0x114a3dc1,0x24000008,0x810000,0x114a3dc1,0x24000008,0x1410000,0x114a3dc1,0x24000010,0x870000,0x114a3dc1, +0x2c000010,0x848000,0x114a3dc7,0x4000000,0xe00000,0x114a3dc7,0x24000000,0xe00000,0x114a3dc7,0x24000002,0xe00000,0x114a3dc7,0x24000002,0x1200000,0x114a3dc7,0x24000008, +0x810000,0x114a3dc7,0x24000008,0x1410000,0x114a3dca,0x7c00900,0x930c00,0x114a3dca,0x7c00900,0xe30c00,0x114a3dcc,0x7c00300,0xe30000,0x114a3eca,0x7000400,0x1200c02, +0x114a3fc1,0x6800004,0x1200000,0x114a3fca,0x7c00d00,0x2530c00,0x114a42cc,0x4000000,0xe00000,0x114a42cc,0x4000000,0xe0000f,0x114a44cc,0x4000000,0xe00002,0x114a44cc, +0x4000000,0xe00003,0x114a45cc,0x4000000,0xe00002,0x114a45cc,0x4000000,0xe0000d,0x11505113,0x24000000,0x810000,0x11505113,0x24000000,0x1410000,0x1180090a,0x2802400, 0x962460,0x11800c27,0x2802100,0x962460,0x11800c27,0x2802500,0x962460,0x11800f32,0x2802400,0x962460,0x11800f3f,0x2802400,0x962460,0x11820700,0x2802400,0x962460, -0x11820700,0x2802500,0x962460,0x118a3dcb,0x2802400,0x962460,0x118a3ec8,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c2c, +0x11820700,0x2802500,0x962460,0x118a3dcd,0x2802400,0x962460,0x118a3eca,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c2c, 0x6800000,0x1329800,0x11c00c30,0xc000010,0xb48000,0x11c00f78,0x6800000,0x1329800,0x11c0107d,0x6800000,0x1329800,0x11c01181,0x6800000,0x1329800,0x11c01285,0x6800000, -0x1329800,0x11c01489,0x4000000,0x200000,0x11c01489,0x6800000,0x1329800,0x11c0168d,0x6800000,0x1329800,0x11d05107,0x7c00100,0x230408,0x20000067,0x1000,0, +0x1329800,0x11c01489,0x4000000,0x200000,0x11c01489,0x6800000,0x1329800,0x11c0168d,0x6800000,0x1329800,0x11d05117,0x7c00100,0x230408,0x20000067,0x1000,0, 0x20000b13,0x2802400,0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100,0x962460,0x20001b27,0x2802100,0x962461,0x20001b27,0x2802400,0x962460,0x20001b27, 0x2802500,0x962460,0x20001b27,0x2806400,0x962460,0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000,0x20001b27,0x4000000,0x400000,0x20001b27,0x4000000, 0x500000,0x20001b27,0x4000000,0x810000,0x20001b27,0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27,0x4000000,0x1410000,0x20001b27,0x4000010,0xb00000, @@ -3692,7 +3705,7 @@ static const uint32_t propsVectors[7230]={ 0x200400,0x3000080e,0x7c00100,0x220400,0x30000908,0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908,0x7c00100,0x220401,0x30000908,0x7c00100,0x250400, 0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006,0x400400,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100,0x230400,0x30000d22,0x2802100,0x962460,0x30000d22, 0x2802400,0x962460,0x30000d22,0x2802500,0x962460,0x30000d22,0x4000000,0x200000,0x30000d22,0x4000010,0x200000,0x30000d22,0x7c00100,0x230400,0x30000d22,0xc000010, -0x248000,0x30000d22,0x80000000,0x218960,0x30000e25,0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100,0x962460,0x30001821,0x2806400,0x962460, +0x248000,0x30000d22,0x80000000,0x218560,0x30000e25,0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100,0x962460,0x30001821,0x2806400,0x962460, 0x30001821,0x4000000,0x200000,0x30001821,0x6800100,0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100,0x230400,0x30001b27,0x2802100,0x962460,0x30001b27, 0x2802400,0x962460,0x30001b27,0x4000000,0x200000,0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400,0x30001c1c,0x2802100,0x1862460,0x30001c1c,0x2802400, 0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c,0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c,0x6800100,0x1862540,0x30001c1c,0x7c00100,0x1830000, @@ -3701,292 +3714,294 @@ static const uint32_t propsVectors[7230]={ 0x230400,0x30002128,0xc000010,0x248000,0x3000221d,0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d,0x4000001,0x445800,0x3000221d,0x7c00100,0x230400, 0x30002300,0x4000010,0x400000,0x30002320,0x7c00100,0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400,0x1862460,0x30002417,0x2806400,0x1862460,0x30002417, 0x2882000,0x1862460,0x30002417,0x4000000,0x200000,0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000,0x30002417,0x4000010,0x400000,0x30002417,0x4000010, -0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100,0x1830000,0x3000251b,0x80000,0xc18820, -0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000,0x3000251b,0x4000010,0x400000,0x3000251b, -0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b,0x7c00900,0x230400,0x3000251b,0xc000010, -0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,0x24000000,0x200000,0x30002800,0x2c000010,0x1248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460, -0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100,0x220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000,0x30003100, -0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x200000,0x3000392e,0x24000000,0x200000,0x30005013,0x7c00100,0x2633801,0x30005600,0, -0x918820,0x30020600,0x4000400,0x500400,0x30020701,0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000,0xe00000,0x300a3a11,0x4020000,0xe00002, -0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002,0x300a4305,0x7c00100,0xe30400,0x300a4611, -0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x3040259a,0x4000010,0x400000,0x3040259a,0x4000010, -0xb70000,0x3040259a,0xc000010,0xb48000,0x304028ba,0x4000001,0xc41c0b,0x304a3dca,0x4000000,0xe00000,0x30800c27,0x2802100,0x962460,0x30c01c92,0x6800000,0x1329800, -0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000,0x3100581e,0x7c00100,0x230400,0x3100590d, -0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00,0x80000,0x918820,0x31005c00,0x2802000, -0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000,0x962540,0x31005c00,0x6800400,0x962540, -0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405,0x31005d00,0x7c00120,0x250405,0x31006000, -0x82000,0x962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419,0x7c00100,0x250400,0x3200080e,0x4000020, -0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100,0x250400,0x32000c02,0x7c00100,0x230400, -0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820,0x32002800,0x4000001,0x445802,0x32002800, -0x24000000,0x200000,0x32002800,0x24000000,0x1500002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919,0x7c00100,0x22040f,0x32002a00,0x4000000, -0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000,0x200000,0x32002c00,0x4000020,0x200000, -0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000,0x32002f00,0x24000020,0x200000,0x32003000, -0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003700,0x24000000,0x100000,0x32003700,0x24000000, -0x200000,0x32003800,0x24000000,0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400, -0x32006108,0x7c00100,0x250400,0x3200622a,0x2802100,0x962460,0x3200622a,0x2806400,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b, -0x6804000,0x962540,0x3200632b,0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100, -0x230400,0x32006600,0x24000020,0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020,0x810000, -0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460,0x32023300, -0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020,0x810000,0x32086600,0x24000020, -0x1410000,0x32086900,0x24000020,0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x200000,0x320a3d11,0x7c00100,0x1230400,0x320a3e14,0x7c00100,0xe30010, -0x320a3e14,0x7c00100,0x2530000,0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11,0x7c00300,0xe30001,0x320a6b16, -0x7c00100,0x2530c00,0x32406396,0xc000010,0x448000,0x324a3dcd,0x4000000,0xe00000,0x324a3dcd,0x7c00100,0x1230400,0x324a3fc8,0x4000002,0x1200c00,0x324a53c5,0x24000000, -0xe00000,0x32820701,0x2802000,0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400,0x4000080e,0x7c00100,0x220400, -0x4000080e,0x7c00100,0x250400,0x4000080e,0x7c00100,0x250402,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02,0x2802500,0x962460,0x40000c02, -0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000c02,0x80000000,0x218960,0x40000d22,0x7c00100,0x230400,0x40000f0a,0x7c00100, -0x230400,0x40001004,0x7c00100,0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460,0x4000120f,0x4000000,0x1600000, -0x4000120f,0x7c00100,0x230400,0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615,0x2802400,0x962460,0x40001615, -0x7c00100,0x230400,0x40002417,0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002,0x200000,0x40002c00,0x4000000, -0x200002,0x40003000,0x24000000,0x200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40005a09,0x7c00100,0x220400,0x40005a09,0x7c00100,0x250400, -0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30,0x6800000,0x1329800,0x40006f30, -0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000,0x200000,0x40007208,0x7c00100, -0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400,0x40007219,0x7c00500,0x220400, -0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,0x24000000,0x200000,0x40007400,0x4000000,0x200000,0x40007531,0x7c00100,0x230400,0x40007631, -0x7c00100,0x230400,0x40007835,0x4000010,0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400,0x40007a32,0x6800000,0x1329800,0x40007a32,0x7c00100, -0x230400,0x40007b2f,0x7c00100,0x230400,0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701,0x2802400,0xc62460,0x40023300,0x4000000,0x200000, -0x40027d01,0x12882000,0x962460,0x400a3700,0x24000000,0x200000,0x400a3700,0x24000000,0xe00000,0x400a4400,0x4000000,0xe0000d,0x400a4412,0x4000000,0xe00002,0x400a4412, -0x4000000,0xe00003,0x400a4500,0x4000000,0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010,0x40507709,0x4000000,0x200000,0x4050770c,0x4000000, -0x400000,0x4050770f,0x4000000,0x200000,0x4050770f,0x4000000,0x400000,0x40c01489,0x4000000,0x200000,0x40d05107,0x4000000,0x200000,0x41000419,0x7c00100,0x220400, -0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e,0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100,0x250400,0x41000b13, -0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000,0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0x1500000,0x41000c02,0xc000010, -0xb48000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400,0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c,0x7c00100,0x22040f, -0x41001d0c,0x7c00100,0x230400,0x41001f0b,0x2802400,0x962460,0x41001f0b,0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000,0x200000,0x41002800, -0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000,0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000,0x41002c00,0x7c00120, -0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x200000,0x41003700,0x24000000,0xe00000,0x41005d00,0x7c00120,0x220405,0x41006600,0x24000020,0x200000, -0x41006600,0x24000020,0x810000,0x41006600,0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100,0x220400,0x41007300,0x24000000,0x200000,0x41007e0e, -0x2802000,0x962460,0x41007e0e,0x4000000,0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400,0x41008002,0x7c00100,0x230400,0x41008137,0x2802100, -0x962460,0x41008137,0x4000000,0x200000,0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301,0x2802000,0x962460,0x41008407,0x4000000,0x200000, -0x41008407,0x4000000,0x400000,0x41008407,0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100,0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b, -0x4000000,0x200000,0x4100860b,0x7c00100,0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400,0x41008838,0x7c00100,0x250400,0x41008939,0x2802000, -0x962460,0x41008939,0x2802100,0x962460,0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939,0x4000000,0x400000,0x41008939,0x7c00100,0x230400, -0x41008939,0xc000010,0x448000,0x41008a00,0x4000400,0x200400,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000,0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b, -0x7e00100,0x1830000,0x41008c3d,0x4000010,0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f,0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100, -0x22040f,0x41008e00,0x24000000,0x200000,0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00,0x24000006,0x400000,0x41008f3a,0x2802100,0x962460, -0x41008f3a,0x2806000,0x962460,0x41008f3a,0x4000000,0x200000,0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c,0x7c00100,0x230400,0x4100903c, -0x7c00100,0x23040f,0x41020701,0x2802000,0x962460,0x41020701,0x2802000,0xc62460,0x410a3700,0x24000000,0x200000,0x410a3700,0x24000000,0xe00000,0x410a4412,0x4000000, -0xe00003,0x410a4711,0x7c40300,0xe30000,0x410a4f11,0x7c00300,0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010,0x410a9100,0x4000000,0x870010, -0x410a9100,0x4000000,0xb00010,0x410a9100,0x4000000,0xf00010,0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100,0x4000000,0x1071410,0x410a9100, -0x4000000,0x1410010,0x41408ad0,0x4000400,0x200000,0x414a82ca,0x4000000,0xe00000,0x41808300,0x2802000,0x962460,0x41c01489,0x6800000,0x1329800,0x50000419,0x7c00100, -0x220400,0x50000419,0x7c00100,0x250400,0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908,0x7c00100,0x250400,0x50000b13,0x2802500,0x962460, -0x50000f0a,0x7c00100,0x230400,0x50001615,0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020,0x962460,0x50002c00,0x4000000,0x200000,0x50002c19, -0x7c00100,0x220400,0x50002d19,0x7c00100,0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000,0x50003700,0x24000000,0x200000,0x50005d00,0x7c00120, -0x220405,0x50005d00,0x7c00120,0x250405,0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600,0x24000020,0x200000,0x50007300,0x24000000,0x200000, -0x50008301,0x2802400,0x962460,0x50008a00,0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000,0x200000,0x50009257,0x4000010,0x1071400,0x50009257, -0x6800000,0x1329800,0x50009257,0x7c00100,0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400,0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100, -0x962460,0x5000933e,0x2802400,0x962460,0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000,0x400000,0x5000933e,0x4000010,0x400000,0x5000933e,0x6800000,0x1329800, -0x5000933e,0x6800100,0x962540,0x5000933e,0x6800100,0x962541,0x5000933e,0x6804400,0x962540,0x5000933e,0x7c00100,0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e, -0xc000010,0x448000,0x50009419,0x7c00100,0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200400,0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100, -0x230400,0x5000965a,0xc000010,0xb48000,0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b,0x7c00100,0x230400,0x50009865,0x7c00100,0x230400, -0x50009965,0x4000010,0x400000,0x50009965,0x7c00100,0x230400,0x50409aca,0x4000000,0x200000,0x5100080e,0x7c00100,0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02, -0x2802100,0x962460,0x51000c02,0x4000000,0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400,0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500, -0x230400,0x51001110,0x2802100,0x962460,0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524,0x2802100,0x962460,0x51001524,0x4000000,0x200000, -0x51001524,0x7c00100,0x230400,0x5100171a,0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000,0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27, -0x4000000,0x200000,0x51001b27,0x4000000,0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400,0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802500, -0x1862460,0x51001c1c,0x2806400,0x1862460,0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c,0x6800100,0x1862400,0x51001c1c,0x6800100,0x1862540, -0x51001c1c,0x6800500,0x1862400,0x51001c1c,0x7c00100,0x1830000,0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619,0x7c00100,0x250400,0x51002800, -0x80020,0x218820,0x51002c00,0x4000000,0x200000,0x51002d19,0x7c00100,0x230400,0x51003700,0x24000000,0x200000,0x51003700,0x24000000,0xe00000,0x51005201,0x2802400, -0x962460,0x51005c00,0x4000000,0x200000,0x51006108,0x7c00100,0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000,0x51006600,0x24000020,0x810000, -0x51006600,0x24000020,0x1410000,0x51007300,0x24000000,0x200000,0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301,0x2802000,0x962460,0x51008301, -0x2802400,0x962460,0x51008301,0x2802400,0xc62460,0x51008a00,0x7c00500,0x230400,0x51008e00,0x24000000,0x200000,0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000, -0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,0x51008e00,0x24000000,0x1710000,0x51008e00,0x24000002,0x200000,0x51008e00,0x24000500,0x230400, -0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,0x7c00100,0x22040e,0x51009419,0x7c00100,0x22040f,0x51009419,0x7c00100,0x250400,0x51009500, -0x4000400,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,0x220400,0x51009519,0x7c00100,0x22040f,0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100, -0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,0x51009b71,0x6800100,0x962540,0x51009b71,0x6804400,0x962540,0x51009b71,0x7c00100,0x230400, -0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,0x2802d00,0x962460,0x51009c52,0x4000010,0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52, -0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,0x448000,0x51009d6d,0x6800000,0x1329800,0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500, -0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,0x51009e08,0x2802100,0x962460,0x51009f63,0x4000010,0x400000,0x51009f63,0x6800000,0x1329800, -0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,0xc000010,0x448000,0x51009f63,0xc000010,0xb48000,0x5100a008,0x2000,0x962460,0x5100a008, -0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,0x220400,0x5100a008,0x7c00100,0x230400,0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500, -0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,0x5100a16f,0x6800100,0x962540,0x5100a16f,0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000, -0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,0x6800000,0x1329800,0x5100a24f,0x7c00100,0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e, -0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,0x962540,0x5100a36e,0x6804400,0x962540,0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100, -0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800,0x5100a442,0x6800100,0x962540,0x5100a442,0x7c00100,0x230400,0x5100a442,0xc000010,0x448000, -0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,0x2802000,0x962460,0x5100a76b,0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c, -0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,0xe00000,0x5100ab00,0x4000000,0xe00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020, -0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,0x510a7300,0x24000000,0x200000,0x510aaa00,0x4000000,0xe00000,0x5140a2fe,0x4000400,0x400000, -0x514a82ca,0x4000000,0xe00000,0x51802bbc,0x2802000,0x962460,0x51c00908,0x2802400,0x962460,0x51c0a008,0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a, -0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,0x1600000,0x52001b00,0x4000000,0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100, -0x1862400,0x52001c1c,0x6800500,0x1862400,0x52001e12,0x7c00100,0x2230500,0x52001e12,0x7c00100,0x2330520,0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400, -0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,0x4000000,0x200006,0x52003000,0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108, -0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,0x962460,0x52008407,0x7c00100,0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000, -0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,0x52009419,0x7c00100,0x250400,0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460, -0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,0x4000010,0x200000,0x5200ac7e,0x7c00100,0x230400,0x5200ac7e,0xc000010,0x248000,0x5200ad28, -0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400,0x962460,0x5200ae6a,0x2802400,0x1862460,0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000, -0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400,0x5200ae6a,0x6800100,0x1862540,0x5200ae6a,0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000, -0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083,0x7c00100,0x230400,0x5200b083,0xc000010,0x448000,0x5200b182,0x2802400,0x962460,0x5200b182, -0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100,0x230400,0x5200b182,0xc000010,0x448000,0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000, -0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460,0x5200b54e,0x2802400,0x962460,0x5200b54e,0x4000000,0x200000,0x5200b54e,0x4000010,0x400000, -0x5200b54e,0x6800000,0x1329800,0x5200b54e,0x6800100,0x962540,0x5200b54e,0x6804400,0x962540,0x5200b54e,0x7c00100,0x230400,0x5200b54e,0xc000010,0x448000,0x5200b61c, -0x4000000,0x1800000,0x5200b61c,0x6800500,0x1862400,0x5200b61c,0x7c00100,0x1830000,0x5200b61c,0x7c00900,0x1830000,0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400, -0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000,0x5200b77f,0x7c00100,0x1830000,0x5200b77f,0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000, -0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873,0x2806400,0x962460,0x5200b873,0x6800000,0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873, -0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010,0x448000,0x5200b912,0x7c00100,0x2230500,0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000, -0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400,0x5200bb85,0x4000000,0x200000,0x5200bb85,0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000, -0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d,0x4000000,0x200000,0x5200bd7d,0x7c00100,0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a, -0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000,0x200000,0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400, -0x962460,0x5200c178,0x4000000,0x200000,0x5200c178,0x6800100,0x962540,0x5200c178,0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000, -0x5200c178,0x80000000,0x218960,0x5200c247,0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400,0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00, -0x4000000,0x100006,0x52023700,0x24000000,0x100000,0x52023700,0x24000000,0xe00000,0x52023700,0x24000000,0x2800000,0x52024400,0x4000000,0x100000,0x52027300,0x24000000, -0x100000,0x5202c300,0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003,0x5202c300,0x4000000,0x10000d,0x5202c300,0x4000100,0x150400, -0x5202c300,0x4000100,0x15040d,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0xe00000,0x520a3800,0x24000000,0x100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11, -0x7c00300,0xe30001,0x520a7300,0x24000000,0x100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000,0xe00002,0x520ac400,0x4000000,0xe0000d,0x520ac414,0x4000000, -0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af9c,0x7c00100,0x230400,0x5240afa1,0x4000400,0x200000,0x5240afa3,0x6800400,0x962540,0x5240afa3,0x7c00100,0x230400, -0x5240afad,0x7c00100,0x230400,0x5240afaf,0x7c00100,0x230400,0x5240b2d2,0x4000000,0x200000,0x5240b2d2,0x4000000,0x1500000,0x5240b2dd,0x4000000,0x200000,0x5240b2eb, -0x4000000,0x200000,0x524a44ca,0x4000000,0xe00003,0x5250b501,0x7c00900,0x230400,0x5280af9c,0x2802400,0x962460,0x5280af9d,0x2802400,0x962460,0x5280afa3,0x2802400, -0x962460,0x5280afa5,0x2802400,0x962460,0x5280afa7,0x2802400,0x962460,0x52c0b3f8,0x2802400,0x962460,0x52c0b3fc,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460, -0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,0x962540,0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a, -0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,0x60001b27,0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802400, -0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,0x24000000,0x200000,0x60003000,0x24000000,0xe00000,0x60003700,0x24000000,0x200000, -0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,0x220400,0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301, -0x2802400,0xc62460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,0x60009519,0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100, -0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x2703580,0x6000c654,0x2802000,0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400, -0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,0x200000,0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540,0x6000c73f, -0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,0x6000c941,0x2806400,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941,0x4000010, -0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941,0x7c00100,0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400, -0x6000cc00,0x4000000,0xe00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000,0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300, -0x4000100,0x15040d,0x600a3000,0x24000000,0x200000,0x600a3000,0x24000000,0xe00000,0x600a3700,0x24000000,0x200000,0x600a3800,0x24000000,0x200000,0x600a3800,0x24000000, -0x2800000,0x600a4305,0x7c00100,0xe30400,0x600ac300,0x4000000,0x100000,0x600ac400,0x4000000,0xe0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00, -0x600acc00,0x4000000,0xe00000,0x600acd00,0x4000000,0x200000,0x600acd00,0x4000000,0xe00000,0x600acd00,0x4000000,0x2800000,0x600ace00,0x4000000,0xe00000,0x600ace00, -0x4000000,0x2800000,0x600acf00,0x4000000,0xe00000,0x600acf00,0x4000000,0x2800000,0x600ad111,0x7c40300,0xe30000,0x604ac4ca,0x4000000,0xe00003,0x61000a03,0x4000000, -0x1600000,0x61000c02,0x80000000,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100,0x1830000,0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400, -0x61006600,0x24000020,0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400,0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00, -0x24000000,0x400000,0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519,0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500, -0x22040f,0x61009b71,0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100,0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f, -0x6100cd00,0x4000000,0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460,0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302, -0x7c00120,0x230405,0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476,0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000, -0x1329800,0x6100d476,0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010,0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460, -0x6100d573,0x6800100,0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400,0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756, -0x7c00100,0x230400,0x6100d85c,0x2802500,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c,0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100, -0x962460,0x6100d997,0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000,0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540, -0x6100d997,0x7c00100,0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000,0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71, -0x4000000,0x200000,0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99,0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400, -0x962540,0x6100dc99,0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300,0xe30001,0x610ace00,0x4000000,0xe00000,0x6140afa1,0x7c00100,0x230400, -0x6140afa3,0x7c00100,0x230400,0x6180af9e,0x2802400,0x962460,0x62002a00,0x4000000,0x1600000,0x63002800,0x80000,0x918820,0x63c00c15,0x80000,0x918820,0x7000080e, -0x7c00100,0x250400,0x70000a03,0x4000000,0x200000,0x70000c00,0x80000000,0x218960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100, -0x962460,0x70001524,0x7c00100,0x230400,0x70001615,0x2802100,0x962460,0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400, -0x70002a00,0x4000000,0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000,0x24000000,0x200000,0x70003800,0x24000000,0xe00000,0x70005201,0x2802400,0x962460,0x7000581e, -0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108,0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e,0x4000000, -0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400,0x962460,0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002,0x400000, -0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000,0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400,0x70009519, -0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965,0x4000010,0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008,0x7c00100, -0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000,0x200000,0x7000b61c,0x2802500,0x1862460,0x7000b61c,0x6800500,0x1862400,0x7000b61c,0x7c00100,0x1830000, -0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0x962460,0x7000cc00,0x4000000,0xe00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0xe00000,0x7000cd00, -0x4000000,0x2800000,0x7000cf00,0x4000000,0xe00000,0x7000d202,0x2802100,0x962460,0x7000d202,0x7c00100,0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010, -0x248000,0x7000dd86,0x2802400,0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86,0xc000010,0x448000,0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400, -0x7000e001,0x2400,0x962460,0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000,0x962460,0x7000e187,0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187, -0x7c00100,0x230400,0x7000e187,0xc000010,0x448000,0x7000e288,0x7c00100,0x230400,0x7000e300,0x4000000,0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400, -0x962460,0x7000e489,0x6800100,0x962540,0x7000e489,0x6800100,0x962541,0x7000e489,0x6804400,0x962540,0x7000e489,0x7c00100,0x230400,0x7000e489,0x7c00900,0x230400, -0x7000e59d,0x2802100,0x962460,0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d,0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d, -0x6804400,0x962540,0x7000e59d,0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100,0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400, -0x962460,0x7000e691,0x6800000,0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400,0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400, -0x7000e719,0x7c00100,0x220400,0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0,0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0, -0x4000000,0x500000,0x7000e9a0,0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000,0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010, -0x400000,0x7000ea79,0x7c00100,0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000,0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460, -0x7000eca3,0x2806400,0x962460,0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3,0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3, -0xc000010,0x448000,0x7000ed95,0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010,0x448000,0x7000ee1c,0x2802500,0x1862460,0x7000ee1c,0x6800000, -0x1329800,0x7000ee1c,0x7c00100,0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000,0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000, -0x7000f08e,0x7c00100,0x230400,0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200,0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200, -0x4000000,0x1710000,0x7000f34b,0x2802400,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010,0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100, -0x230400,0x7000f34b,0x7c00900,0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000,0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400, -0x7000f67b,0x4000000,0x200000,0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6,0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6, -0x2806400,0x962460,0x7000f8a6,0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800,0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100, -0x962541,0x7000f8a6,0x7c00100,0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000,0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460, -0x7000fb9e,0x2802400,0x962460,0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e,0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e, -0x6800100,0x962541,0x7000fb9e,0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000,0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100, -0x230400,0x7000fc92,0x7c00100,0x250400,0x700acd00,0x4000000,0xe00000,0x700acd00,0x4000000,0x2800000,0x700ace00,0x4000000,0xe00000,0x700acf00,0x4000000,0xe00000, -0x700acf00,0x4000000,0x2800000,0x7050df11,0x4000000,0x200000,0x7050f719,0x80000,0x918820,0x7080afa1,0x2802400,0x962460,0x7090df11,0x2802400,0x962460,0x70d0e417, -0x2802100,0x962460,0x70d0e417,0x2802400,0x962460,0x70d0e417,0x6800100,0x962540,0x70d0ea15,0x4000010,0x400000,0x8000120f,0x7c00100,0x230400,0x80001524,0x7c00100, -0x230400,0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100,0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00,0x4000000,0x200000, -0x80005208,0x2802400,0x962460,0x80005c00,0x4000000,0x200000,0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100,0x230400,0x80009519, -0x7c00100,0x250400,0x80009865,0x7c00100,0x230400,0x8000a008,0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400,0x8000cd00,0x4000000, -0xe00000,0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100,0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802000,0x962460,0x8000d997,0x2802400,0x962460, -0x8000d997,0x4000000,0x400000,0x8000d997,0x4000000,0x500000,0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100,0x962460,0x8000e489, -0x7c00100,0x230400,0x8000e719,0x7c00100,0x220400,0x8000f8a6,0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000,0x8000fda1,0x2802100, -0x1862460,0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000,0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862400,0x8000fda1,0x6800100,0x1862540, -0x8000fda1,0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400,0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06, -0x7c00100,0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2,0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010, -0x448000,0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100,0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000, -0x80010670,0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000,0x800acd00,0x4000000,0xe00000,0x800acd00,0x4000000,0x2902460,0x800ace00, -0x4000000,0xe00000,0x800acf00,0x4000000,0xe00000,0x800b0011,0x7c40300,0xe30000,0x800b0500,0x4000000,0xe00000,0x800b0500,0x4000000,0x2800000,0x90001615,0x7c00100, -0x230400,0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000,0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802400,0x962460, -0x90008e00,0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460,0x9000d200,0x80000000,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202, -0x2802100,0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x90010500,0x4000000,0xe00000,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400, -0x962460,0x900107a7,0x2802c00,0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000,0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400, -0x900108a8,0x2802100,0x962460,0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000,0x200000,0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8, -0x6800000,0x1329800,0x900108a8,0x6800100,0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8,0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100, -0x962460,0x90010ca9,0x2802100,0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010,0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400, -0x90010d1b,0x4000000,0x500000,0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400,0x962460,0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa, -0x4000000,0x400000,0x90010eaa,0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa,0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010, -0x448000,0x90010fab,0x7c00100,0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000,0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0xe00000, -0x900acd00,0x4000000,0x2800000,0x900acf00,0x4000000,0xe00000,0x900b0500,0x4000000,0xe00000,0x900b0500,0x4000000,0x2800000,0x900b0b9a,0x7c00900,0x1230400,0x900b109a, -0x7c00300,0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000,0xa0001004,0x4000000,0x200000,0xa0001004,0x7c00100,0x230400,0xa000120f,0x2802100, -0x962460,0xa000120f,0x2802400,0x962460,0xa000171a,0x2802100,0x962460,0xa000171a,0x2806400,0x962460,0xa0002a00,0x4000000,0x1600000,0xa0003000,0x24000000,0x200000, -0xa000581e,0x7c00100,0x230400,0xa0007300,0x24000000,0x200000,0xa0008301,0x2802400,0x962460,0xa0008e00,0x24000000,0x400000,0xa000cf00,0x4000000,0xe00000,0xa0010500, -0x4000000,0x200000,0xa00114af,0x2802100,0x962460,0xa00114af,0x2802400,0x962460,0xa00114af,0x2806400,0x962460,0xa00114af,0x6800000,0x1329800,0xa00114af,0x7c00100, -0x230400,0xa00114af,0x7c00100,0x230560,0xa00116b0,0x2802100,0x962460,0xa00116b0,0x2802800,0x962460,0xa00116b0,0x2806400,0x962460,0xa00116b0,0x4000000,0x400000, -0xa00116b0,0x4000000,0x500000,0xa00116b0,0x4000010,0x400000,0xa00116b0,0x6800100,0x962540,0xa00116b0,0x7c00100,0x230400,0xa00116b0,0x7c00100,0x230560,0xa00116b0, -0xc000010,0x448000,0xa0011722,0x7c00100,0x230400,0xa00118b1,0x2802000,0x962460,0xa00118b1,0x2802100,0x962460,0xa00118b1,0x2806400,0x962460,0xa00118b1,0x4000000, -0x200000,0xa00118b1,0x4000000,0x400000,0xa00118b1,0x4000000,0x500000,0xa00118b1,0x6800100,0x962540,0xa00118b1,0x7c00100,0x230400,0xa00118b1,0x7c00100,0x230560, -0xa00118b1,0xc000010,0x448000,0xa00a4005,0x7c00100,0xe30400,0xa00a4711,0x7c40300,0xe30000,0xa00ac400,0x4000000,0xe00000,0xa00acb14,0x7c00100,0xe30000,0xa00acf00, -0x4000000,0xe00000,0xa00b0500,0x4000000,0xe00000,0xa00b0500,0x4000000,0x2800000,0xa00b0b96,0x7c00900,0x1230400,0xa00b1211,0x7c40300,0xe30000,0xa00b1314,0x7c00100, -0xe30000,0xa00b1596,0x7c00300,0xe30000,0xa040afb7,0x6800400,0x962540,0xa08083b8,0x2802400,0x962460,0xb0000a03,0x7c00100,0x220400,0xb0000b13,0x7c00100,0x2633800, -0xb0001004,0x2802000,0x962460,0xb0001110,0x4000000,0x200000,0xb0001524,0x2802100,0x962460,0xb0001615,0x4000000,0x500000,0xb000251b,0x7c00100,0x230400,0xb0007300, -0x24000000,0x200000,0xb0008939,0x4000000,0x200000,0xb0008939,0x7c00100,0x230400,0xb0008e00,0x24000000,0x200000,0xb0008e00,0x24000000,0x400000,0xb0008e00,0x24000010, -0x400000,0xb0009257,0x2802000,0x962460,0xb0009257,0x4000000,0x1600000,0xb0009519,0x7c00100,0x220400,0xb0009519,0x7c00100,0x250400,0xb0009a00,0x4000000,0x200000, -0xb000b30a,0x2802100,0x962460,0xb000b30a,0x7c00100,0x230400,0xb000c178,0x80000000,0x218960,0xb000c300,0x4000000,0x200000,0xb000d202,0x2802000,0x962460,0xb000d476, -0x6800100,0x962540,0xb000d476,0x7c00100,0x230400,0xb000e300,0x4000000,0xe00000,0xb000fda1,0x7c00100,0x1830000,0xb0010eaa,0x2802000,0x962460,0xb00116b0,0x7c00100, -0x230400,0xb0011900,0x4000000,0xe00000,0xb0011ab2,0x2802100,0x962460,0xb0011ab2,0x2802400,0x962460,0xb0011ab2,0x2806400,0x962460,0xb0011ab2,0x4000000,0x200000, -0xb0011ab2,0x6800100,0x962540,0xb0011ab2,0x7c00100,0x230400,0xb0011b0c,0x7c00100,0x230400,0xb0011cb3,0x2802100,0x962460,0xb0011cb3,0x2806400,0x962460,0xb0011cb3, -0x6800000,0x1329800,0xb0011cb3,0x6800100,0x962540,0xb0011cb3,0x7c00100,0x230400,0xb0011db6,0x2802500,0x962460,0xb0011db6,0x6800000,0x1329800,0xb0011db6,0x7c00100, -0x230400,0xb0011db6,0x7c00500,0x230400,0xb0011e00,0x4000000,0x200000,0xb0011e00,0x4000000,0x1500000,0xb0011fb4,0x2802100,0x962460,0xb0011fb4,0x6800100,0x962540, -0xb0011fb4,0x7c00100,0x230400,0xb0011fb4,0xc000010,0x248000,0xb0012000,0x4000000,0x200000,0xb00121b5,0x4000000,0x200000,0xb00121b5,0x4000010,0x400000,0xb00121b5, -0x7c00100,0x220400,0xb00121b5,0x7c00100,0x250400,0xb00121b5,0xc000010,0x448000,0xb00122b8,0x4000000,0x200000,0xb00122b8,0x7c00100,0x230400,0xb00123b7,0x2802400, -0x962460,0xb00123b7,0x4000000,0x200000,0xb00123b7,0x7c00100,0x230400,0xb00123b7,0xc000010,0x248000,0xb00a4005,0x7c00100,0xe30400,0xb00a4711,0x7c40300,0xe30000, -0xb00acf00,0x4000000,0xe00000,0xb00b0500,0x4000000,0xe00000,0xb00b0500,0x4000000,0x2800000,0xb00b109a,0x7c00300,0xe30000,0xb080e487,0x2802000,0x962460,0xc0001524, -0x4000000,0x500000,0xc0001a18,0x2806400,0x1862460,0xc0001a18,0x7c00100,0x1830000,0xc0007300,0x24000000,0x200000,0xc0008e00,0x24000010,0x400000,0xc0009519,0x7c00100, -0x220400,0xc0009519,0x7c00100,0x250400,0xc000c300,0x4000000,0x20000f,0xc000d85c,0x2802100,0x962460,0xc000d85c,0x6800100,0x962540,0xc000d85c,0x7c00100,0x230400, -0xc000dc99,0x7c00100,0x230400,0xc000e719,0x7c00100,0x220400,0xc00107a7,0x7c00100,0x230400,0xc0010eaa,0x7c00100,0x230400,0xc00116b0,0x7c00100,0x230560,0xc0011900, -0x4000000,0x200000,0xc0012447,0,0x818820,0xc0012447,0,0xc18820,0xc0012447,0,0x1418820,0xc00125b9,0x7c00100,0x230400,0xc00126bb,0x2802100, -0x962460,0xc00126bb,0x2806400,0x962460,0xc00126bb,0x4000000,0x500000,0xc00126bb,0x6800100,0x962540,0xc00126bb,0x7c00100,0x230400,0xc00127ba,0x2802400,0x962460, -0xc00127ba,0x4000000,0x200000,0xc00127ba,0x6800000,0x1329800,0xc00127ba,0x7c00100,0x230400,0xc00127ba,0x7c00900,0x230400,0xc0012800,0x4000000,0x200000,0xc0012b23, -0x4000000,0x200000,0xc0012b23,0x4000000,0x400000,0xc0012b23,0x4000000,0x1500000,0xc0012cbc,0x2802400,0x962460,0xc0012cbc,0x4000000,0x1600000,0xc0012cbc,0x6800000, -0x1329800,0xc0012cbc,0x7c00100,0x230400,0xc00acf00,0x4000000,0xe00000,0xc00ae300,0x4000000,0xe00000,0xc00b0500,0x4000000,0xe00000,0xc00b0500,0x4000000,0x2800000, -0xc00b0b11,0x4000000,0x1200000,0xc00b0b11,0x7c00900,0x1230400,0xc00b109a,0x7c00300,0xe30000,0xc00b2914,0x7c00100,0x2530000,0xc00b2916,0x7c00100,0x2530c00,0xc00b2a00, -0x4000000,0xe00000,0xc040af5e,0x7c00100,0x230400,0xc0c12b89,0x4000000,0x200000,0xc14a44ca,0x4000000,0xe0000d,0xd000131f,0x2802c00,0x962460,0xd000171a,0x7c00100, -0x230400,0xd0001821,0x2802100,0x962460,0xd0007300,0x24000000,0x200000,0xd0008e00,0x24000000,0x200000,0xd0008f3a,0x2806000,0x962460,0xd0009519,0x7c00100,0x220400, -0xd0009519,0x7c00100,0x250400,0xd000a500,0x4000000,0x200000,0xd000c300,0x4000000,0xe00000,0xd000d202,0x7c00100,0x230400,0xd000d476,0x7c00100,0x230400,0xd000d997, -0x2802100,0x962460,0xd000d997,0x6800100,0x962540,0xd000e001,0x2802100,0x962460,0xd000e700,0x4000400,0x200000,0xd000e719,0x7c00100,0x220400,0xd000e719,0x7c00500, -0x22040f,0xd000fa00,0x4000000,0xe00000,0xd0010eaa,0x4000010,0x400000,0xd0010eaa,0x7c00100,0x230400,0xd0012dbd,0x4000000,0x200000,0xd0012dbd,0x7c00100,0x230400, -0xd0012fbe,0x2802100,0x962460,0xd0012fbe,0x2802400,0x962460,0xd0012fbe,0x2806400,0x962460,0xd0012fbe,0x4000000,0x400000,0xd0012fbe,0x6800000,0x1329800,0xd0012fbe, -0x6800100,0x962540,0xd0012fbe,0x6800100,0x962541,0xd0012fbe,0x6804400,0x962540,0xd0012fbe,0x7c00100,0x230400,0xd0012fbe,0x7c00100,0x230560,0xd0012fbe,0xc000010, -0x448000,0xd0013183,0x7c00100,0x230400,0xd0013200,0x4000000,0x200000,0xd0013200,0x6800000,0x1329805,0xd00134c0,0x2802100,0x962460,0xd00134c0,0x4000002,0x400000, -0xd00134c0,0x7c00100,0x230400,0xd00a4305,0x7c00100,0xe30400,0xd00a4611,0x7c40300,0xe30000,0xd00a4711,0x7c40300,0xe30000,0xd00a5e11,0x7c40300,0xe30000,0xd00acf00, -0x4000000,0xe00000,0xd00b0500,0x4000000,0xe00000,0xd00b0500,0x4000000,0x2800000,0xd00b0b11,0x6800500,0x962540,0xd00b0bbf,0x2802200,0xc62460,0xd00b119a,0x7c00300, -0xe30000,0xd00b2a00,0x4000000,0xe00000,0xd00b2e11,0x7c40300,0xe30000,0xd00b30bf,0x7c00300,0x230000,0xd00b339a,0x7c00300,0xe30000,0xe0000c02,0xc000010,0xb48000, -0xe0001524,0x2802400,0x962460,0xe0001524,0x7c00100,0x230400,0xe0001615,0x7c00100,0x230400,0xe000251b,0x12882000,0x962460,0xe0002a00,0x4000000,0x1500000,0xe0005102, -0x4000000,0x200000,0xe0005c00,0x4000000,0x200000,0xe000622a,0x6804400,0x962540,0xe000622a,0x7c00100,0x230400,0xe0008838,0x7c00100,0x220400,0xe0008838,0x7c00100, -0x250400,0xe0008e00,0x24000000,0x810000,0xe0008e00,0x24000000,0x1410000,0xe0008e00,0x24000002,0x400000,0xe0008e00,0x2c000010,0xb48000,0xe000933e,0x7c00100,0x230400, -0xe000933e,0xc000010,0x448000,0xe0009519,0x7c00100,0x220400,0xe0009519,0x7c00100,0x22040f,0xe0009519,0x7c00100,0x250400,0xe000c178,0x2802100,0x962460,0xe000c941, -0x2802100,0x962460,0xe000c941,0x2806400,0x962460,0xe000c941,0x7c00100,0x230400,0xe000d202,0x2802400,0x962460,0xe000d202,0x7c00100,0x230400,0xe000d202,0x7c00500, -0x230400,0xe000dc99,0x4000000,0x200000,0xe000e001,0x2802100,0x962460,0xe000e001,0x2802400,0x962460,0xe000fda1,0x7c00100,0x1830000,0xe0013502,0x2802400,0x962460, -0xe0013502,0x4000000,0x200000,0xe0013502,0x7c00100,0x230400,0xe0013502,0x80000000,0x218960,0xe00136c1,0x4000000,0x200000,0xe00136c1,0x7c00100,0x230400,0xe001370b, -0x7c00100,0x230400,0xe0013919,0x7c00500,0x220400,0xe0013919,0x7c00500,0x22040f,0xe0013919,0x7c00d00,0x23040f,0xe0013a19,0x7c00100,0x220400,0xe0013a19,0x7c00100, -0x230400,0xe0013bc2,0x2802400,0x962460,0xe0013bc2,0x7c00100,0x230400,0xe0013bc2,0xc000010,0x248000,0xe0013cc3,0x6800000,0x1329800,0xe0013cc3,0x7c00100,0x230400, -0xe0013dc4,0x2802400,0x962460,0xe0013dc4,0x7c00100,0x230400,0xe0013e28,0x7c00100,0x230400,0xe0013fc5,0x7c00100,0x220400,0xe0013fc5,0x7c00100,0x250400,0xe0014000, -0x4000000,0x200000,0xe0014001,0x2802400,0x962460,0xe00a4711,0x7c40300,0xe30000,0xe00a5e11,0x7c40300,0xe30000,0xe00ac511,0x7c40300,0xe30000,0xe00acf00,0x4000000, -0xe00000,0xe00ae300,0x4000000,0xe00000,0xe00b0500,0x4000000,0xe00000,0xe00b1314,0x7c00100,0xe30000,0xe00b1316,0x7c00100,0xe30c00,0xe00b2a00,0x4000000,0xe00000, -0xe00b2a00,0x4000000,0x2800000,0xe00b3816,0x7c00500,0x230c00,0xe0808328,0x2802400,0x962460,0xf0001615,0x6800100,0x962540,0xf0001a18,0x2802000,0x1862460,0xf000c247, -0x7c00100,0x230400,0xf000d000,0x4000000,0xe00000,0xf000e300,0x4000000,0xe00000,0xf000e59d,0x2802100,0x962460,0xf000e59d,0x7c00100,0x230400,0xf0012447,0, -0x818820,0xf0012447,0,0xc18820,0xf0012447,0,0x1418820,0xf0012447,0x2802000,0x962460,0xf0012447,0x2802400,0x962460,0xf0012447,0x7c00100,0x230400, -0xf0013a19,0x7c00100,0x220400,0xf0014102,0x2802400,0x962460,0xf0014308,0x2802100,0x962460,0xf0014308,0x7c00500,0x22040e,0xf0014308,0x7c00500,0x22040f,0xf001440a, -0x4000000,0x500000,0xf0014500,0x4000000,0x200000,0xf00146c6,0x2802100,0x962460,0xf00146c6,0x2806000,0x962460,0xf00146c6,0x4000000,0xe00000,0xf00146c6,0x6800000, -0x1329800,0xf00146c6,0x6800100,0x962540,0xf00146c6,0x6804000,0x962540,0xf00146c6,0x7c00100,0x230400,0xf00146c6,0x7c00100,0x230560,0xf00146c6,0xc000010,0x448000, -0xf00147c7,0x2802000,0x962460,0xf00147c7,0x6800000,0x1329800,0xf00147c7,0x7c00100,0x230400,0xf00ac511,0x7c40300,0xe30000,0xf00acf00,0x4000000,0xe00000,0xf00b2914, -0x7c00100,0x2530000,0xf00b2916,0x7c00100,0x2530c00,0xf00b2a00,0x4000000,0xe00000,0xf00b2a00,0x4000000,0x2800000,0xf00b4211,0x7c40300,0xe30000}; +0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100,0x1830000,0x30002417,0xc000010,0x448000, +0x3000251b,0x80000,0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000,0x3000251b, +0x4000010,0x400000,0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b,0x7c00900, +0x230400,0x3000251b,0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,0x24000000,0x200000,0x30002800,0x2c000010,0x1248002,0x30002a00,0x4000000,0x1600000, +0x30002b01,0x2000,0x962460,0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100,0x220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000, +0x24000000,0x200000,0x30003100,0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x200000,0x3000392e,0x24000000,0x200000,0x30005013,0x7c00100, +0x2633801,0x30005600,0,0x918820,0x30020600,0x4000400,0x500400,0x30020701,0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000,0xe00000, +0x300a3a11,0x4020000,0xe00002,0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002,0x300a4305, +0x7c00100,0xe30400,0x300a4611,0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x3040259a,0x4000010, +0x400000,0x3040259a,0x4000010,0xb70000,0x3040259a,0xc000010,0xb48000,0x304028bc,0x4000001,0xc41c0b,0x304a3dcc,0x4000000,0xe00000,0x30800c27,0x2802100,0x962460, +0x30c01c92,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000,0x3100581e, +0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00,0x80000, +0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000,0x962540, +0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405,0x31005d00, +0x7c00120,0x250405,0x31006000,0x82000,0x962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419,0x7c00100, +0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100,0x250400, +0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820,0x32002800, +0x4000001,0x445802,0x32002800,0x24000000,0x200000,0x32002800,0x24000000,0x1500002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919,0x7c00100, +0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000,0x200000, +0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000,0x32002f00, +0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003700,0x24000000, +0x100000,0x32003700,0x24000000,0x200000,0x32003800,0x24000000,0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400, +0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100,0x250400,0x3200622a,0x2802100,0x962460,0x3200622a,0x2806400,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b, +0x2802100,0x962460,0x3200632b,0x6804000,0x962540,0x3200632b,0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100, +0x962460,0x3200652d,0x7c00100,0x230400,0x32006600,0x24000020,0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000,0x32006900,0x24000020,0x200000, +0x32006900,0x24000020,0x810000,0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701, +0x2882000,0xc62460,0x32023300,0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020, +0x810000,0x32086600,0x24000020,0x1410000,0x32086900,0x24000020,0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x200000,0x320a3d11,0x7c00100,0x1230400, +0x320a3e14,0x7c00100,0xe30010,0x320a3e14,0x7c00100,0x2530000,0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11, +0x7c00300,0xe30001,0x320a6b16,0x7c00100,0x2530c00,0x32406396,0xc000010,0x448000,0x324a3dcf,0x4000000,0xe00000,0x324a3dcf,0x7c00100,0x1230400,0x324a3fca,0x4000002, +0x1200c00,0x324a53c7,0x24000000,0xe00000,0x32820701,0x2802000,0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400, +0x4000080e,0x7c00100,0x220400,0x4000080e,0x7c00100,0x250400,0x4000080e,0x7c00100,0x250402,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02, +0x2802500,0x962460,0x40000c02,0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000c02,0x80000000,0x1329960,0x40000d22,0x7c00100, +0x230400,0x40000f0a,0x7c00100,0x230400,0x40001004,0x7c00100,0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460, +0x4000120f,0x4000000,0x1600000,0x4000120f,0x7c00100,0x230400,0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615, +0x2802400,0x962460,0x40001615,0x7c00100,0x230400,0x40002417,0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002, +0x200000,0x40002c00,0x4000000,0x200002,0x40003000,0x24000000,0x200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40005a09,0x7c00100,0x220400, +0x40005a09,0x7c00100,0x250400,0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30, +0x6800000,0x1329800,0x40006f30,0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000, +0x200000,0x40007208,0x7c00100,0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400, +0x40007219,0x7c00500,0x220400,0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,0x24000000,0x200000,0x40007400,0x4000000,0x200000,0x40007531, +0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010,0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400,0x40007a32,0x6800000, +0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400,0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701,0x2802400,0xc62460, +0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700,0x24000000,0x200000,0x400a3700,0x24000000,0xe00000,0x400a4400,0x4000000,0xe0000d,0x400a4412, +0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000,0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010,0x40507719,0x4000000, +0x200000,0x4050771c,0x4000000,0x400000,0x4050771f,0x4000000,0x200000,0x4050771f,0x4000000,0x400000,0x40c01489,0x4000000,0x200000,0x40d05117,0x4000000,0x200000, +0x41000419,0x7c00100,0x220400,0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e,0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908, +0x7c00100,0x250400,0x41000b13,0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000,0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000, +0x1500000,0x41000c02,0xc000010,0xb48000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400,0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000, +0x41001d0c,0x7c00100,0x22040f,0x41001d0c,0x7c00100,0x230400,0x41001f0b,0x2802400,0x962460,0x41001f0b,0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800, +0x24000000,0x200000,0x41002800,0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000,0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000, +0x200000,0x41002c00,0x7c00120,0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x200000,0x41003700,0x24000000,0xe00000,0x41005d00,0x7c00120,0x220405, +0x41006600,0x24000020,0x200000,0x41006600,0x24000020,0x810000,0x41006600,0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100,0x220400,0x41007300, +0x24000000,0x200000,0x41007e0e,0x2802000,0x962460,0x41007e0e,0x4000000,0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400,0x41008002,0x7c00100, +0x230400,0x41008137,0x2802100,0x962460,0x41008137,0x4000000,0x200000,0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301,0x2802000,0x962460, +0x41008407,0x4000000,0x200000,0x41008407,0x4000000,0x400000,0x41008407,0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100,0x250400,0x4100850b, +0x7c00100,0x230400,0x4100860b,0x4000000,0x200000,0x4100860b,0x7c00100,0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400,0x41008838,0x7c00100, +0x250400,0x41008939,0x2802000,0x962460,0x41008939,0x2802100,0x962460,0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939,0x4000000,0x400000, +0x41008939,0x7c00100,0x230400,0x41008939,0xc000010,0x448000,0x41008a00,0x4000400,0x200400,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000,0x1329800,0x41008b3b, +0x7c00100,0x1830000,0x41008b3b,0x7e00100,0x1830000,0x41008c3d,0x4000010,0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f,0x41008d19,0x7c00100, +0x220400,0x41008d19,0x7c00100,0x22040f,0x41008e00,0x24000000,0x200000,0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00,0x24000006,0x400000, +0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a,0x4000000,0x200000,0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c, +0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000,0x962460,0x41020701,0x2802000,0xc62460,0x410a3700,0x24000000,0x200000,0x410a3700,0x24000000, +0xe00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000,0x410a4f11,0x7c00300,0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010, +0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100,0x4000000,0xf00010,0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100, +0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x41408ad2,0x4000400,0x200000,0x414a82cc,0x4000000,0xe00000,0x41808300,0x2802000,0x962460,0x41c01489,0x6800000, +0x1329800,0x50000419,0x7c00100,0x220400,0x50000419,0x7c00100,0x250400,0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908,0x7c00100,0x250400, +0x50000b13,0x2802500,0x962460,0x50000f0a,0x7c00100,0x230400,0x50001615,0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020,0x962460,0x50002c00, +0x4000000,0x200000,0x50002c19,0x7c00100,0x220400,0x50002d19,0x7c00100,0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000,0x50003700,0x24000000, +0x200000,0x50005d00,0x7c00120,0x220405,0x50005d00,0x7c00120,0x250405,0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600,0x24000020,0x200000, +0x50007300,0x24000000,0x200000,0x50008301,0x2802400,0x962460,0x50008a00,0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000,0x200000,0x50009257, +0x4000010,0x1071400,0x50009257,0x6800000,0x1329800,0x50009257,0x7c00100,0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400,0x50009257,0xc000010, +0xb48000,0x5000933e,0x2802100,0x962460,0x5000933e,0x2802400,0x962460,0x5000933e,0x4000000,0x400000,0x5000933e,0x4000000,0xe00000,0x5000933e,0x4000010,0x400000, +0x5000933e,0x6800000,0xe29800,0x5000933e,0x6800100,0x962540,0x5000933e,0x6800100,0x962541,0x5000933e,0x6804400,0x2f62540,0x5000933e,0x7c00100,0x2b30400,0x5000933e, +0x7c00100,0x2b30401,0x5000933e,0xc000010,0x448000,0x50009419,0x7c00100,0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200400,0x5000965a,0x4000000, +0x500000,0x5000965a,0x7c00100,0x230400,0x5000965a,0xc000010,0xb48000,0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b,0x7c00100,0x230400, +0x50009865,0x7c00100,0x230400,0x50009965,0x4000010,0x400000,0x50009965,0x7c00100,0x230400,0x50409acc,0x4000000,0x200000,0x5100080e,0x7c00100,0x220400,0x5100080e, +0x7c00100,0x250400,0x51000c02,0x2802100,0x962460,0x51000c02,0x4000000,0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400,0x51000f0a,0x7c00100, +0x230400,0x51000f0a,0x7c00500,0x230400,0x51001110,0x2802100,0x962460,0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524,0x2802100,0x962460, +0x51001524,0x4000000,0x200000,0x51001524,0x7c00100,0x230400,0x5100171a,0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000,0x1500000,0x5100171a, +0x7c00100,0x230400,0x51001b27,0x4000000,0x200000,0x51001b27,0x4000000,0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400,0x51001c1c,0x2802100, +0x1862460,0x51001c1c,0x2802500,0x1862460,0x51001c1c,0x2806400,0x1862460,0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c,0x6800100,0x1862400, +0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800500,0x1862400,0x51001c1c,0x7c00100,0x1830000,0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619, +0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000,0x200000,0x51002d19,0x7c00100,0x230400,0x51003700,0x24000000,0x200000,0x51003700,0x24000000, +0xe00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000,0x51006108,0x7c00100,0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000, +0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300,0x24000000,0x200000,0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301, +0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008301,0x2802400,0xc62460,0x51008a00,0x7c00500,0x230400,0x51008e00,0x24000000,0x200000,0x51008e00,0x24000000, +0x400000,0x51008e00,0x24000000,0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,0x51008e00,0x24000000,0x1710000,0x51008e00,0x24000002,0x200000, +0x51008e00,0x24000500,0x230400,0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,0x7c00100,0x22040e,0x51009419,0x7c00100,0x22040f,0x51009419, +0x7c00100,0x250400,0x51009500,0x4000400,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,0x220400,0x51009519,0x7c00100,0x22040f,0x51009519,0x7c00100, +0x230400,0x51009519,0x7c00100,0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,0x51009b71,0x6800100,0x962540,0x51009b71,0x6804400,0x962540, +0x51009b71,0x7c00100,0x230400,0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,0x2802d00,0x962460,0x51009c52,0x4000010,0x400000,0x51009c52, +0x6800000,0x1329800,0x51009c52,0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,0x448000,0x51009d6d,0x6800000,0x1329800,0x51009d6d,0x7c00100, +0x230400,0x51009d6d,0x7c00500,0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,0x51009e08,0x2802100,0x962460,0x51009f63,0x4000010,0x400000, +0x51009f63,0x6800000,0x1329800,0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,0xc000010,0x448000,0x51009f63,0xc000010,0xb48000,0x5100a008, +0x2000,0x962460,0x5100a008,0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,0x220400,0x5100a008,0x7c00100,0x230400,0x5100a008,0x7c00100, +0x250400,0x5100a008,0x7c00500,0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,0x5100a16f,0x6800100,0x962540,0x5100a16f,0x7c00100,0x230400, +0x5100a16f,0xc000010,0x448000,0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,0x6800000,0x1329800,0x5100a24f,0x7c00100,0x230400,0x5100a24f, +0xc000010,0x448000,0x5100a36e,0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,0x962540,0x5100a36e,0x6804400,0x962540,0x5100a36e,0x7c00100, +0x230400,0x5100a442,0x2802100,0x962460,0x5100a442,0x4000000,0xe00000,0x5100a442,0x6800000,0xe29800,0x5100a442,0x6800100,0x962540,0x5100a442,0x7c00100,0x430400, +0x5100a442,0x7c00100,0x2d30400,0x5100a442,0xc000010,0x448000,0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,0x2802000,0x962460,0x5100a76b, +0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c,0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,0xe00000,0x5100ab00,0x4000000, +0xe00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020,0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,0x510a7300,0x24000000,0x200000, +0x510aaa00,0x4000000,0xe00000,0x514a82cc,0x4000000,0xe00000,0x5150a20e,0x4000400,0x400000,0x51802bbe,0x2802000,0x962460,0x51c00908,0x2802400,0x962460,0x51c0a008, +0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a,0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,0x1600000,0x52001b00,0x4000000, +0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100,0x1862400,0x52001c1c,0x6800500,0x1862400,0x52001e12,0x7c00100,0x2230500,0x52001e12,0x7c00100,0x2330520, +0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400,0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,0x4000000,0x200006,0x52003000, +0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108,0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,0x962460,0x52008407,0x7c00100, +0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000,0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,0x52009419,0x7c00100,0x250400, +0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460,0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,0x4000010,0x200000,0x5200ac7e, +0x7c00100,0x230400,0x5200ac7e,0xc000010,0x248000,0x5200ad28,0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400,0x962460,0x5200ae6a,0x2802400, +0x1862460,0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400,0x5200ae6a,0x6800100,0x1862540, +0x5200ae6a,0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083,0x7c00100,0x230400,0x5200b083, +0xc000010,0x448000,0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100,0x230400,0x5200b182,0xc000010, +0x448000,0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460,0x5200b54e,0x2802400,0x962460, +0x5200b54e,0x4000000,0xe00000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0xe29800,0x5200b54e,0x6800100,0x962540,0x5200b54e,0x6804400,0x2f62540,0x5200b54e, +0x7c00100,0x2b30400,0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c,0x6800500,0x1862400,0x5200b61c,0x7c00100,0x1830000,0x5200b61c,0x7c00900, +0x1830000,0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000,0x5200b77f,0x7c00100,0x1830000, +0x5200b77f,0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873,0x2806400,0x962460,0x5200b873, +0x6800000,0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010,0x448000,0x5200b912,0x7c00100, +0x2230500,0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400,0x5200bb85,0x4000000,0x200000, +0x5200bb85,0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d,0x4000000,0x200000,0x5200bd7d, +0x7c00100,0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000,0x200000,0x5200c178,0x2802100, +0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000,0x5200c178,0x6800100,0x962540,0x5200c178,0x7c00100,0x230400, +0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000,0x5200c178,0x80000000,0x1329960,0x5200c247,0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400,0x5200c247, +0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00,0x4000000,0x100006,0x52023700,0x24000000,0x100000,0x52023700,0x24000000,0xe00000,0x52023700,0x24000000, +0x2800000,0x52024400,0x4000000,0x100000,0x52027300,0x24000000,0x100000,0x5202c300,0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003, +0x5202c300,0x4000000,0x10000d,0x5202c300,0x4000100,0x150400,0x5202c300,0x4000100,0x15040d,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0xe00000,0x520a3800, +0x24000000,0x100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11,0x7c00300,0xe30001,0x520a7300,0x24000000,0x100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000, +0xe00002,0x520ac400,0x4000000,0xe0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af9c,0x7c00100,0x230400,0x5240afa1,0x4000400,0x200000, +0x5240afa3,0x6800400,0x962540,0x5240afa3,0x7c00100,0x230400,0x5240afad,0x7c00100,0x230400,0x5240afaf,0x7c00100,0x230400,0x5240b2d4,0x4000000,0x200000,0x5240b2e3, +0x4000000,0x200000,0x5240b2f1,0x4000000,0x200000,0x5240b2fc,0x4000000,0x1500000,0x524a44cc,0x4000000,0xe00003,0x5250b511,0x7c00900,0x430400,0x5280af9c,0x2802400, +0x962460,0x5280af9d,0x2802400,0x962460,0x5280afa3,0x2802400,0x962460,0x5280afa5,0x2802400,0x962460,0x5280afa7,0x2802400,0x962460,0x52d0b308,0x2802400,0x962460, +0x52d0b30c,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460,0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,0x962540,0x60000f0a, +0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a,0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,0x60001b27,0x4000000, +0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802400,0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,0x24000000,0x200000, +0x60003000,0x24000000,0xe00000,0x60003700,0x24000000,0x200000,0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,0x220400,0x60006108, +0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301,0x2802400,0xc62460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,0x60009519,0x7c00100, +0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100,0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x2703580,0x6000c654,0x2802000,0x962460, +0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400,0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,0x200000,0x6000c73f, +0x6800100,0x962540,0x6000c73f,0x6804000,0x2e62540,0x6000c73f,0x7c00100,0x2d30400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,0x6000c941,0x2806400, +0x2f62460,0x6000c941,0x4000000,0xe00000,0x6000c941,0x4000010,0xe00000,0x6000c941,0x6800000,0x2d29800,0x6000c941,0x6800100,0x962540,0x6000c941,0x7c00100,0x2b30400, +0x6000c941,0x7c00100,0x2c30400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400,0x6000cc00,0x4000000,0xe00000,0x6000d000,0x4000000,0x200000,0x6002c300, +0x4000000,0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300,0x4000100,0x15040d,0x600a3000,0x24000000,0x200000,0x600a3000,0x24000000, +0xe00000,0x600a3700,0x24000000,0x200000,0x600a3800,0x24000000,0x200000,0x600a3800,0x24000000,0x2800000,0x600a4305,0x7c00100,0xe30400,0x600ac300,0x4000000,0x100000, +0x600ac400,0x4000000,0xe0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000,0xe00000,0x600acd00,0x4000000,0x200000,0x600acd00, +0x4000000,0xe00000,0x600acd00,0x4000000,0x2800000,0x600ace00,0x4000000,0xe00000,0x600ace00,0x4000000,0x2800000,0x600acf00,0x4000000,0xe00000,0x600acf00,0x4000000, +0x2800000,0x600ad111,0x7c40300,0xe30000,0x604ac4cc,0x4000000,0xe00003,0x61000a03,0x4000000,0x1600000,0x61000c02,0x80000000,0x1329960,0x6100120f,0x4000000,0x200000, +0x61001a18,0x7c00100,0x1830000,0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400,0x61006600,0x24000020,0x200000,0x61008407,0x7c00100,0x220400,0x61008407, +0x7c00100,0x250400,0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000,0x400000,0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100, +0x230400,0x61009519,0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f,0x61009b71,0x2802100,0x962460,0x61009b71,0x2806400,0x962460, +0x61009b71,0x7c00100,0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00,0x4000000,0x200000,0x6100d202,0x2802400,0x962460,0x6100d202, +0x2802500,0x962460,0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120,0x230405,0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100, +0x962461,0x6100d476,0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800,0x6100d476,0x6800100,0x962540,0x6100d476,0x7c00100,0x230400, +0x6100d476,0xc000010,0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573,0x6800100,0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573, +0x7c00900,0x230400,0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100,0x230400,0x6100d85c,0x2802500,0x962460,0x6100d85c,0x6800100, +0x962540,0x6100d85c,0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460,0x6100d997,0x4000000,0x200000,0x6100d997,0x4000000,0x400000, +0x6100d997,0x6800000,0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997,0x7c00100,0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997, +0xc000010,0x448000,0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000,0x200000,0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400, +0x962460,0x6100dc99,0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540,0x6100dc99,0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000, +0x610a4f11,0x7c00300,0xe30001,0x610ace00,0x4000000,0xe00000,0x6140afa1,0x7c00100,0x230400,0x6140afa3,0x7c00100,0x230400,0x6180af9e,0x2802400,0x962460,0x62002a00, +0x4000000,0x1600000,0x63002800,0x80000,0x918820,0x63c00c15,0x80000,0x918820,0x7000080e,0x7c00100,0x250400,0x70000a03,0x4000000,0x200000,0x70000c00,0x80000000, +0x1329960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100,0x962460,0x70001524,0x7c00100,0x230400,0x70001615,0x2802100,0x962460, +0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400,0x70002a00,0x4000000,0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000, +0x24000000,0x200000,0x70003800,0x24000000,0xe00000,0x70005201,0x2802400,0x962460,0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108,0x7c00100, +0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e,0x4000000,0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400,0x962460, +0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002,0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000,0x70008e00, +0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400,0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965,0x4000010, +0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008,0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000,0x200000, +0x7000b61c,0x2802500,0x1862460,0x7000b61c,0x6800500,0x1862400,0x7000b61c,0x7c00100,0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0xc62460,0x7000cc00, +0x4000000,0xe00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0xe00000,0x7000cd00,0x4000000,0x2800000,0x7000cf00,0x4000000,0xe00000,0x7000d202,0x2802100, +0x962460,0x7000d202,0x7c00100,0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400,0x962460,0x7000dd86,0x7c00100,0x230400, +0x7000dd86,0xc000010,0x448000,0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2400,0x962460,0x7000e001,0x2802400,0x962460,0x7000e187, +0x2802000,0x962460,0x7000e187,0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187,0xc000010,0x448000,0x7000e288,0x7c00100, +0x230400,0x7000e300,0x4000000,0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100,0x962540,0x7000e489,0x6800100,0x962541, +0x7000e489,0x6804400,0x2f62540,0x7000e489,0x7c00100,0x430400,0x7000e489,0x7c00100,0x2b30400,0x7000e489,0x7c00100,0x2d30400,0x7000e489,0x7c00900,0x430400,0x7000e59d, +0x2802100,0x962460,0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d,0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400, +0x962540,0x7000e59d,0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100,0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400,0x962460, +0x7000e691,0x6800000,0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400,0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400,0x7000e719, +0x7c00100,0x220400,0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0,0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000, +0x500000,0x7000e9a0,0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000,0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000, +0x7000ea79,0x7c00100,0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000,0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3, +0x2806400,0x962460,0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3,0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010, +0x448000,0x7000ed95,0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010,0x448000,0x7000ee1c,0x2802500,0x1862460,0x7000ee1c,0x6800000,0x1329800, +0x7000ee1c,0x7c00100,0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000,0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e, +0x7c00100,0x230400,0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200,0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000, +0x1710000,0x7000f34b,0x2802400,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010,0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400, +0x7000f34b,0x7c00900,0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000,0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b, +0x4000000,0x200000,0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6,0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400, +0x962460,0x7000f8a6,0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800,0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541, +0x7000f8a6,0x7c00100,0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000,0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e, +0x2802400,0x962460,0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e,0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100, +0x962541,0x7000fb9e,0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000,0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400, +0x7000fc92,0x7c00100,0x250400,0x700acd00,0x4000000,0xe00000,0x700acd00,0x4000000,0x2800000,0x700ace00,0x4000000,0xe00000,0x700acf00,0x4000000,0xe00000,0x700acf00, +0x4000000,0x2800000,0x7050df21,0x4000000,0x200000,0x7050f729,0x80000,0x918820,0x7080afa1,0x2802400,0x962460,0x7090df21,0x2802400,0x962460,0x70d0e427,0x2802100, +0x962460,0x70d0e427,0x2802400,0x962460,0x70d0e427,0x6800100,0x962540,0x70d0ea25,0x4000010,0x400000,0x8000120f,0x7c00100,0x230400,0x80001524,0x7c00100,0x230400, +0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100,0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00,0x4000000,0x200000,0x80005208, +0x2802400,0x962460,0x80005c00,0x4000000,0x200000,0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100,0x230400,0x80009519,0x7c00100, +0x250400,0x80009865,0x7c00100,0x230400,0x8000a008,0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400,0x8000cd00,0x4000000,0xe00000, +0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100,0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802000,0x962460,0x8000d997,0x2802400,0x962460,0x8000d997, +0x4000000,0x400000,0x8000d997,0x4000000,0x500000,0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100,0x962460,0x8000e489,0x7c00100, +0x2d30400,0x8000e719,0x7c00100,0x220400,0x8000f8a6,0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000,0x8000fda1,0x2802100,0x1862460, +0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000,0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862400,0x8000fda1,0x6800100,0x1862540,0x8000fda1, +0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400,0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06,0x7c00100, +0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2,0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010,0x448000, +0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100,0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000,0x80010670, +0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000,0x800acd00,0x4000000,0xe00000,0x800acd00,0x4000000,0x2902460,0x800ace00,0x4000000, +0xe00000,0x800acf00,0x4000000,0xe00000,0x800b0011,0x7c40300,0xe30000,0x800b0500,0x4000000,0xe00000,0x800b0500,0x4000000,0x2800000,0x90001615,0x7c00100,0x230400, +0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000,0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802400,0x962460,0x90008e00, +0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460,0x9000d200,0x80000000,0x1329960,0x9000d202,0x2802000,0x962460,0x9000d202,0x2802100, +0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x90010500,0x4000000,0xe00000,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460, +0x900107a7,0x2802c00,0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000,0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8, +0x2802100,0x962460,0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000,0x200000,0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000, +0x1329800,0x900108a8,0x6800100,0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8,0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460, +0x90010ca9,0x2802100,0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010,0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b, +0x4000000,0x500000,0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400,0x962460,0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000, +0x400000,0x90010eaa,0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa,0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000, +0x90010fab,0x7c00100,0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000,0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0xe00000,0x900acd00, +0x4000000,0x2800000,0x900acf00,0x4000000,0xe00000,0x900b0500,0x4000000,0xe00000,0x900b0500,0x4000000,0x2800000,0x900b0b9a,0x7c00900,0x1230400,0x900b109a,0x7c00300, +0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000,0xa0001004,0x4000000,0x200000,0xa0001004,0x7c00100,0x230400,0xa000120f,0x2802100,0x962460, +0xa000120f,0x2802400,0x962460,0xa000171a,0x2802100,0x962460,0xa000171a,0x2806400,0x962460,0xa0002a00,0x4000000,0x1600000,0xa0003000,0x24000000,0x200000,0xa000581e, +0x7c00100,0x230400,0xa0007300,0x24000000,0x200000,0xa0008301,0x2802400,0x962460,0xa0008e00,0x24000000,0x400000,0xa000cf00,0x4000000,0xe00000,0xa0010500,0x4000000, +0x200000,0xa00114af,0x2802100,0x962460,0xa00114af,0x2802400,0x962460,0xa00114af,0x2806400,0x962460,0xa00114af,0x6800000,0x1329800,0xa00114af,0x7c00100,0x230400, +0xa00114af,0x7c00100,0x230560,0xa00116b0,0x2802100,0x962460,0xa00116b0,0x2802800,0x962460,0xa00116b0,0x2806400,0x962460,0xa00116b0,0x4000000,0x400000,0xa00116b0, +0x4000000,0x500000,0xa00116b0,0x4000010,0x400000,0xa00116b0,0x6800100,0x962540,0xa00116b0,0x7c00100,0x230400,0xa00116b0,0x7c00100,0x230560,0xa00116b0,0xc000010, +0x448000,0xa0011722,0x7c00100,0x230400,0xa00118b1,0x2802000,0x962460,0xa00118b1,0x2802100,0x962460,0xa00118b1,0x2806400,0x962460,0xa00118b1,0x4000000,0x200000, +0xa00118b1,0x4000000,0x400000,0xa00118b1,0x4000000,0x500000,0xa00118b1,0x6800100,0x962540,0xa00118b1,0x7c00100,0x230400,0xa00118b1,0x7c00100,0x230560,0xa00118b1, +0xc000010,0x448000,0xa00a4005,0x7c00100,0xe30400,0xa00a4711,0x7c40300,0xe30000,0xa00ac400,0x4000000,0xe00000,0xa00acb14,0x7c00100,0xe30000,0xa00acf00,0x4000000, +0xe00000,0xa00b0500,0x4000000,0xe00000,0xa00b0500,0x4000000,0x2800000,0xa00b0b96,0x7c00900,0x1230400,0xa00b1211,0x7c40300,0xe30000,0xa00b1314,0x7c00100,0xe30000, +0xa00b1596,0x7c00300,0xe30000,0xa040afb9,0x6800400,0x962540,0xa08083ba,0x2802400,0x962460,0xb0000a03,0x7c00100,0x220400,0xb0000b13,0x7c00100,0x2633800,0xb0001004, +0x2802000,0x962460,0xb0001110,0x4000000,0x200000,0xb0001524,0x2802100,0x962460,0xb0001615,0x4000000,0x500000,0xb000251b,0x7c00100,0x230400,0xb0007300,0x24000000, +0x200000,0xb0008939,0x4000000,0x200000,0xb0008939,0x7c00100,0x230400,0xb0008e00,0x24000000,0x200000,0xb0008e00,0x24000000,0x400000,0xb0008e00,0x24000010,0x400000, +0xb0009257,0x2802000,0x962460,0xb0009257,0x4000000,0x1600000,0xb0009519,0x7c00100,0x220400,0xb0009519,0x7c00100,0x250400,0xb0009a00,0x4000000,0x200000,0xb000b30a, +0x2802100,0x962460,0xb000b30a,0x7c00100,0x230400,0xb000c178,0x80000000,0x1329960,0xb000c300,0x4000000,0x200000,0xb000d202,0x2802000,0x962460,0xb000d476,0x6800100, +0x962540,0xb000d476,0x7c00100,0x230400,0xb000e300,0x4000000,0xe00000,0xb000fda1,0x7c00100,0x1830000,0xb0010eaa,0x2802000,0x962460,0xb00116b0,0x7c00100,0x230400, +0xb0011900,0x4000000,0xe00000,0xb0011ab2,0x2802100,0x962460,0xb0011ab2,0x2802400,0x962460,0xb0011ab2,0x2806400,0x962460,0xb0011ab2,0x4000000,0x200000,0xb0011ab2, +0x6800100,0x962540,0xb0011ab2,0x7c00100,0x230400,0xb0011b0c,0x7c00100,0x230400,0xb0011cb3,0x2802100,0x962460,0xb0011cb3,0x2806400,0x962460,0xb0011cb3,0x6800000, +0x1329800,0xb0011cb3,0x6800100,0x962540,0xb0011cb3,0x7c00100,0x230400,0xb0011db6,0x2802500,0x962460,0xb0011db6,0x6800000,0x1329800,0xb0011db6,0x7c00100,0x230400, +0xb0011db6,0x7c00500,0x230400,0xb0011e00,0x4000000,0x200000,0xb0011e00,0x4000000,0x1500000,0xb0011fb4,0x2802100,0x962460,0xb0011fb4,0x6800100,0x962540,0xb0011fb4, +0x7c00100,0x430400,0xb0011fb4,0x7c00100,0x2d30400,0xb0011fb4,0xc000010,0x448000,0xb0012000,0x4000000,0x200000,0xb00121b5,0x4000000,0x200000,0xb00121b5,0x4000010, +0x400000,0xb00121b5,0x7c00100,0x220400,0xb00121b5,0x7c00100,0x250400,0xb00121b5,0xc000010,0x448000,0xb00122b8,0x4000000,0x200000,0xb00122b8,0x7c00100,0x230400, +0xb00123b7,0x2802400,0x962460,0xb00123b7,0x4000000,0x200000,0xb00123b7,0x7c00100,0x230400,0xb00123b7,0xc000010,0x248000,0xb00a4005,0x7c00100,0xe30400,0xb00a4711, +0x7c40300,0xe30000,0xb00acf00,0x4000000,0xe00000,0xb00b0500,0x4000000,0xe00000,0xb00b0500,0x4000000,0x2800000,0xb00b109a,0x7c00300,0xe30000,0xb080e487,0x2802000, +0x962460,0xc0001524,0x4000000,0x500000,0xc0001a18,0x2806400,0x1862460,0xc0001a18,0x7c00100,0x1830000,0xc0007300,0x24000000,0x200000,0xc0008e00,0x24000010,0x400000, +0xc0009519,0x7c00100,0x220400,0xc0009519,0x7c00100,0x250400,0xc000c300,0x4000000,0x20000f,0xc000d85c,0x2802100,0x962460,0xc000d85c,0x6800100,0x962540,0xc000d85c, +0x7c00100,0x230400,0xc000dc99,0x7c00100,0x230400,0xc000e719,0x7c00100,0x220400,0xc00107a7,0x7c00100,0x230400,0xc0010eaa,0x7c00100,0x230400,0xc00116b0,0x7c00100, +0x230560,0xc0011900,0x4000000,0x200000,0xc0012447,0,0x818820,0xc0012447,0,0xc18820,0xc0012447,0,0x1418820,0xc00125b9,0x7c00100,0x230400, +0xc00126bb,0x2802100,0x962460,0xc00126bb,0x2806400,0x962460,0xc00126bb,0x4000000,0x500000,0xc00126bb,0x6800100,0x962540,0xc00126bb,0x7c00100,0x230400,0xc00127ba, +0x2802400,0x962460,0xc00127ba,0x4000000,0x200000,0xc00127ba,0x6800000,0x1329800,0xc00127ba,0x7c00100,0x230400,0xc00127ba,0x7c00900,0x230400,0xc0012800,0x4000000, +0x200000,0xc0012b23,0x4000000,0x200000,0xc0012b23,0x4000000,0x400000,0xc0012b23,0x4000000,0x1500000,0xc0012cbc,0x2802400,0x962460,0xc0012cbc,0x4000000,0x1600000, +0xc0012cbc,0x6800000,0x1329800,0xc0012cbc,0x7c00100,0x230400,0xc00acf00,0x4000000,0xe00000,0xc00ae300,0x4000000,0xe00000,0xc00b0500,0x4000000,0xe00000,0xc00b0500, +0x4000000,0x2800000,0xc00b0b11,0x4000000,0x1200000,0xc00b0b11,0x7c00900,0x1230400,0xc00b109a,0x7c00300,0xe30000,0xc00b2914,0x7c00100,0x2530000,0xc00b2916,0x7c00100, +0x2530c00,0xc00b2a00,0x4000000,0xe00000,0xc040af5e,0x7c00100,0x230400,0xc0c12b89,0x4000000,0x200000,0xc14a44cc,0x4000000,0xe0000d,0xd000131f,0x2802c00,0x962460, +0xd000171a,0x7c00100,0x230400,0xd0001821,0x2802100,0x962460,0xd0007300,0x24000000,0x200000,0xd0008e00,0x24000000,0x200000,0xd0008f3a,0x2806000,0x962460,0xd0009519, +0x7c00100,0x220400,0xd0009519,0x7c00100,0x250400,0xd000a500,0x4000000,0x200000,0xd000c300,0x4000000,0xe00000,0xd000d202,0x7c00100,0x230400,0xd000d476,0x7c00100, +0x230400,0xd000d997,0x2802100,0x962460,0xd000d997,0x6800100,0x962540,0xd000e001,0x2802100,0x962460,0xd000e700,0x4000400,0x200000,0xd000e719,0x7c00100,0x220400, +0xd000e719,0x7c00500,0x22040f,0xd000fa00,0x4000000,0xe00000,0xd0010eaa,0x4000010,0x400000,0xd0010eaa,0x7c00100,0x230400,0xd0012dbd,0x4000000,0x200000,0xd0012dbd, +0x7c00100,0x230400,0xd0012fbe,0x2802100,0x962460,0xd0012fbe,0x2802400,0x962460,0xd0012fbe,0x2806400,0x2f62460,0xd0012fbe,0x4000000,0x400000,0xd0012fbe,0x6800000, +0xe29800,0xd0012fbe,0x6800100,0x962540,0xd0012fbe,0x6800100,0x962541,0xd0012fbe,0x6804400,0x962540,0xd0012fbe,0x7c00100,0x2b30400,0xd0012fbe,0x7c00100,0x2c30560, +0xd0012fbe,0xc000010,0x448000,0xd0013183,0x7c00100,0x230400,0xd0013200,0x4000000,0x200000,0xd0013200,0x6800000,0x1329805,0xd00134c0,0x2802100,0x962460,0xd00134c0, +0x4000002,0x400000,0xd00134c0,0x7c00100,0x230400,0xd00a4305,0x7c00100,0xe30400,0xd00a4611,0x7c40300,0xe30000,0xd00a4711,0x7c40300,0xe30000,0xd00a5e11,0x7c40300, +0xe30000,0xd00acf00,0x4000000,0xe00000,0xd00b0500,0x4000000,0xe00000,0xd00b0500,0x4000000,0x2800000,0xd00b0b11,0x6800500,0x962540,0xd00b0bbf,0x2802200,0xc62460, +0xd00b119a,0x7c00300,0xe30000,0xd00b2a00,0x4000000,0xe00000,0xd00b2e11,0x7c40300,0xe30000,0xd00b30bf,0x7c00300,0x230000,0xd00b339a,0x7c00300,0xe30000,0xe0000c02, +0xc000010,0xb48000,0xe0001524,0x2802400,0x962460,0xe0001524,0x7c00100,0x230400,0xe0001615,0x7c00100,0x230400,0xe000251b,0x12882000,0x962460,0xe0002a00,0x4000000, +0x1500000,0xe0005102,0x4000000,0x200000,0xe0005c00,0x4000000,0x200000,0xe000622a,0x6804400,0x962540,0xe000622a,0x7c00100,0x230400,0xe0008838,0x7c00100,0x220400, +0xe0008838,0x7c00100,0x250400,0xe0008e00,0x24000000,0x810000,0xe0008e00,0x24000000,0x1410000,0xe0008e00,0x24000002,0x400000,0xe0008e00,0x2c000010,0xb48000,0xe000933e, +0x7c00100,0x2b30400,0xe000933e,0xc000010,0x448000,0xe0009519,0x7c00100,0x220400,0xe0009519,0x7c00100,0x22040f,0xe0009519,0x7c00100,0x250400,0xe000c178,0x2802100, +0x962460,0xe000c941,0x2802100,0x962460,0xe000c941,0x2806400,0x962460,0xe000c941,0x7c00100,0x2b30400,0xe000d202,0x2802400,0x962460,0xe000d202,0x7c00100,0x230400, +0xe000d202,0x7c00500,0x230400,0xe000dc99,0x4000000,0x200000,0xe000e001,0x2802100,0x962460,0xe000e001,0x2802400,0x962460,0xe000fda1,0x7c00100,0x1830000,0xe0013502, +0x2802400,0x962460,0xe0013502,0x4000000,0x200000,0xe0013502,0x7c00100,0x230400,0xe0013502,0x80000000,0x1329960,0xe00136c1,0x4000000,0x200000,0xe00136c1,0x7c00100, +0x230400,0xe001370b,0x7c00100,0x230400,0xe0013919,0x7c00500,0x220400,0xe0013919,0x7c00500,0x22040f,0xe0013919,0x7c00d00,0x23040f,0xe0013a19,0x7c00100,0x220400, +0xe0013a19,0x7c00100,0x230400,0xe0013bc2,0x2802400,0x962460,0xe0013bc2,0x7c00100,0x230400,0xe0013bc2,0xc000010,0x248000,0xe0013cc3,0x6800000,0x1329800,0xe0013cc3, +0x7c00100,0x230400,0xe0013dc4,0x2802400,0x962460,0xe0013dc4,0x7c00100,0x230400,0xe0013e28,0x7c00100,0x230400,0xe0013fc5,0x7c00100,0x220400,0xe0013fc5,0x7c00100, +0x250400,0xe0014000,0x4000000,0x200000,0xe0014001,0x2802400,0x962460,0xe00a4711,0x7c40300,0xe30000,0xe00a5e11,0x7c40300,0xe30000,0xe00ac511,0x7c40300,0xe30000, +0xe00acf00,0x4000000,0xe00000,0xe00ae300,0x4000000,0xe00000,0xe00b0500,0x4000000,0xe00000,0xe00b1314,0x7c00100,0xe30000,0xe00b1316,0x7c00100,0xe30c00,0xe00b2a00, +0x4000000,0xe00000,0xe00b2a00,0x4000000,0x2800000,0xe00b3816,0x7c00500,0x230c00,0xe0808328,0x2802400,0x962460,0xf0001615,0x6800100,0x962540,0xf0001a18,0x2802000, +0x1862460,0xf000c247,0x7c00100,0x1430400,0xf000d000,0x4000000,0xe00000,0xf000e300,0x4000000,0xe00000,0xf000e59d,0x2802100,0x962460,0xf000e59d,0x7c00100,0x230400, +0xf0012447,0,0x818820,0xf0012447,0,0xc18820,0xf0012447,0,0x1418820,0xf0012447,0x2802000,0x962460,0xf0012447,0x2802400,0x962460,0xf0012447, +0x7c00100,0x230400,0xf0013a19,0x7c00100,0x220400,0xf0014102,0x2802400,0x962460,0xf0014308,0x2802100,0x962460,0xf0014308,0x7c00500,0x22040e,0xf0014308,0x7c00500, +0x22040f,0xf001440a,0x4000000,0x500000,0xf0014500,0x4000000,0x200000,0xf00146c6,0x2802100,0x962460,0xf00146c6,0x2806000,0x2f62460,0xf00146c6,0x4000000,0xe00000, +0xf00146c6,0x6800000,0x2d29800,0xf00146c6,0x6800100,0x962540,0xf00146c6,0x6804000,0x962540,0xf00146c6,0x7c00100,0x2b30400,0xf00146c6,0x7c00100,0x2c30560,0xf00146c6, +0xc000010,0x448000,0xf00147c7,0x2802000,0x962460,0xf00147c7,0x6800000,0x1329800,0xf00147c7,0x7c00100,0x230400,0xf00ac511,0x7c40300,0xe30000,0xf00acf00,0x4000000, +0xe00000,0xf00b2914,0x7c00100,0x2530000,0xf00b2916,0x7c00100,0x2530c00,0xf00b2a00,0x4000000,0xe00000,0xf00b2a00,0x4000000,0x2800000,0xf00b4211,0x7c40300,0xe30000, +0xf10a3c00,0x4000000,0xe00000,0xf10a3c00,0x4008000,0xe00000,0xf10a8200,0x4008000,0xe00000,0xf10b4811,0x7c40300,0xe30000}; -static const int32_t countPropsVectors=7230; +static const int32_t countPropsVectors=7260; static const int32_t propsVectorsColumns=3; -static const uint16_t scriptExtensions[282]={ +static const uint16_t scriptExtensions[298]={ 0x800e,0x8019,8,0x8059,8,2,8,0x8038,8,6,8,0x8019,2,0x22,0x25,0x57, 0xb6,0x80c0,2,0x22,0x8025,2,0x12,2,0x22,0x25,0x57,0xa7,0xb6,0x80c0,2,0x22, 0x54,0x79,0x7b,0xa7,0xb6,0xb7,0x80c2,2,0x8022,2,0x25,0x80c0,2,0x29,2,0x80b6, @@ -3998,14 +4013,15 @@ static const uint16_t scriptExtensions[282]={ 0x80a4,0x10,0x7f,0xf,0x809d,0xf,0x83,0x23,0x8089,0x23,0x87,0x15,0x80bb,0x15,0x8b,0x1c, 0x34,0x8076,0x1c,0x8f,0xc,0x8019,0x2a,0x2b,0x2c,0x802d,0x1b,0x805a,0x800a,4,0xa,0x15, 0x8089,0xa,0x8089,4,0x800a,0xa,0x8097,0xa,0x15,0x1a,0x1f,0x23,0x8024,0xa,0x80bb,4, -0xa,0x15,0x1f,0x24,0x89,0x9e,0x80bb,0x8004,8,0x8022,0x19,0x801b,0xa,0x19,0x8089,5, -0x11,0x12,0x14,0x16,0x8029,5,0x11,0x12,0x14,0x8016,0x8011,5,0x8011,0x11,0x14,0x8016, -0x11,0x8019,0xa,0xf,0x10,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0x80b2,0xa,0xf,0x10, -0x15,0x1a,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,0xa,0xf,0x10,0x15,0x78, -0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,0xa,0xa3,0xa,0x8023,0xa,0xfa,0x19,0x1c, -0x804f,0x37,0x804e,2,0x8057,2,0x8025,2,0x105,0x2f,0x31,0x8053,0x2f,0x31,0x80c1,0x2f, -0x8031,2,0x8007,0x79,0x80c2,0x79,0x113,0x89,0x87,0x8087}; +0xa,0x15,0x1a,0x1f,0x21,0x24,0x89,0x9e,0x80bb,0x8004,8,0x8022,0x19,0x801b,0xa,0x19, +0x8089,5,0x11,0x12,0x14,0x16,0x8029,5,0x11,0x12,0x14,0x8016,0x8011,5,0x8011,0x11, +0x14,0x8016,0x11,0x8019,0xa,0xf,0x10,0x15,0x1a,0x78,0x91,0x97,0x99,0x9d,0x9e,0xa0, +0xa3,0xb2,0x80bb,0xa,0xf,0x10,0x15,0x78,0x91,0x97,0x99,0x9d,0x9e,0xa0,0xa3,0xb2, +0x80bb,0xa,0xf,0x10,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0x80b2,0xa,0xf,0x10,0x78, +0x91,0x97,0x99,0x9d,0x9e,0xa0,0xa3,0x80b2,0xa,0xa3,0xa,0x8023,0xa,0x10a,0x19,0x1c, +0x804f,0x37,0x804e,2,0x8057,2,0x8025,2,0x115,0x2f,0x31,0x8053,0x2f,0x31,0x80c1,0x2f, +0x8031,2,0x8007,0x79,0x80c2,0x79,0x123,0x89,0x87,0x8087}; -static const int32_t indexes[UPROPS_INDEX_COUNT]={0x2d08,0x2d08,0x2d08,0x2d08,0x6ce6,3,0x8924,0x89b1,0x89b1,0x89b1,0xb47c7,0x2a75a31,0,0,0,0}; +static const int32_t indexes[UPROPS_INDEX_COUNT]={0x2d4e,0x2d4e,0x2d4e,0x2d4e,0x6d50,3,0x89ac,0x8a41,0x8a41,0x8a41,0xb48c7,0x2f75a31,0,0,0,0}; #endif // INCLUDED_FROM_UCHAR_C diff --git a/yass/third_party/icu/source/common/ucmndata.h b/yass/third_party/icu/source/common/ucmndata.h index 486b4fd7b5..1e63833f49 100644 --- a/yass/third_party/icu/source/common/ucmndata.h +++ b/yass/third_party/icu/source/common/ucmndata.h @@ -45,6 +45,20 @@ typedef struct { UDataInfo info; } DataHeader; +typedef struct { + DataHeader hdr; + char padding[8]; + uint32_t count, reserved; + /* + const struct { + const char *const name; + const void *const data; + } toc[1]; + */ + int fakeNameAndData[4]; /* TODO: Change this header type from */ + /* pointerTOC to OffsetTOC. */ +} ICU_Data_Header; + typedef struct { uint32_t nameOffset; uint32_t dataOffset; diff --git a/yass/third_party/icu/source/common/ucurr.cpp b/yass/third_party/icu/source/common/ucurr.cpp index ffca8aac5f..70b1bbf223 100644 --- a/yass/third_party/icu/source/common/ucurr.cpp +++ b/yass/third_party/icu/source/common/ucurr.cpp @@ -11,6 +11,8 @@ #if !UCONFIG_NO_FORMATTING +#include + #include "unicode/ucurr.h" #include "unicode/locid.h" #include "unicode/ures.h" @@ -20,6 +22,7 @@ #include "unicode/usetiter.h" #include "unicode/utf16.h" #include "ustr_imp.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" @@ -520,14 +523,18 @@ ucurr_forLocale(const char* locale, return 0; } - char currency[4]; // ISO currency codes are alpha3 codes. UErrorCode localStatus = U_ZERO_ERROR; - int32_t resLen = uloc_getKeywordValue(locale, "currency", - currency, UPRV_LENGTHOF(currency), &localStatus); - if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) { + CharString currency; + { + CharStringByteSink sink(¤cy); + ulocimp_getKeywordValue(locale, "currency", sink, &localStatus); + } + int32_t resLen = currency.length(); + + if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency.data(), resLen)) { if (resLen < buffCapacity) { - T_CString_toUpperCase(currency); - u_charsToUChars(currency, buff, resLen); + T_CString_toUpperCase(currency.data()); + u_charsToUChars(currency.data(), buff, resLen); } return u_terminateUChars(buff, buffCapacity, resLen, ec); } @@ -597,11 +604,15 @@ ucurr_forLocale(const char* locale, if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) { // We don't know about it. Check to see if we support the variant. - uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec); + CharString parent; + { + CharStringByteSink sink(&parent); + ulocimp_getParent(locale, sink, ec); + } *ec = U_USING_FALLBACK_WARNING; - // TODO: Loop over the shortened id rather than recursing and + // TODO: Loop over the parent rather than recursing and // looking again for a currency keyword. - return ucurr_forLocale(id, buff, buffCapacity, ec); + return ucurr_forLocale(parent.data(), buff, buffCapacity, ec); } if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) { // There is nothing to fallback to. Report the failure/warning if possible. @@ -624,20 +635,22 @@ ucurr_forLocale(const char* locale, * @return true if the fallback happened; false if locale is already * root (""). */ -static UBool fallback(char *loc) { - if (!*loc) { +static UBool fallback(CharString& loc) { + if (loc.isEmpty()) { return false; } UErrorCode status = U_ZERO_ERROR; - if (uprv_strcmp(loc, "en_GB") == 0) { + if (loc == "en_GB") { // HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en" // in order to consume the correct data strings. This hack will be removed // when proper data sink loading is implemented here. - // NOTE: "001" adds 1 char over "GB". However, both call sites allocate - // arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001). - uprv_strcpy(loc + 3, "001"); + loc.truncate(3); + loc.append("001", status); } else { - uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status); + CharString tmp; + CharStringByteSink sink(&tmp); + ulocimp_getParent(loc.data(), sink, &status); + loc = std::move(tmp); } /* char *i = uprv_strrchr(loc, '_'); @@ -692,9 +705,12 @@ ucurr_getName(const char16_t* currency, // this function. UErrorCode ec2 = U_ZERO_ERROR; - char loc[ULOC_FULLNAME_CAPACITY]; - uloc_getName(locale, loc, sizeof(loc), &ec2); - if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { + CharString loc; + { + CharStringByteSink sink(&loc); + ulocimp_getName(locale, sink, &ec2); + } + if (U_FAILURE(ec2)) { *ec = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -707,7 +723,7 @@ ucurr_getName(const char16_t* currency, const char16_t* s = nullptr; ec2 = U_ZERO_ERROR; - LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2)); + LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc.data(), &ec2)); if (nameStyle == UCURR_NARROW_SYMBOL_NAME || nameStyle == UCURR_FORMAL_SYMBOL_NAME || nameStyle == UCURR_VARIANT_SYMBOL_NAME) { CharString key; @@ -791,9 +807,12 @@ ucurr_getPluralName(const char16_t* currency, // this function. UErrorCode ec2 = U_ZERO_ERROR; - char loc[ULOC_FULLNAME_CAPACITY]; - uloc_getName(locale, loc, sizeof(loc), &ec2); - if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { + CharString loc; + { + CharStringByteSink sink(&loc); + ulocimp_getName(locale, sink, &ec2); + } + if (U_FAILURE(ec2)) { *ec = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -803,7 +822,7 @@ ucurr_getPluralName(const char16_t* currency, const char16_t* s = nullptr; ec2 = U_ZERO_ERROR; - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); + UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc.data(), &ec2); rb = ures_getByKey(rb, CURRENCYPLURALS, rb, &ec2); @@ -904,13 +923,17 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_ *total_currency_name_count = 0; *total_currency_symbol_count = 0; const char16_t* s = nullptr; - char locale[ULOC_FULLNAME_CAPACITY] = ""; - uprv_strcpy(locale, loc); + CharString locale; + { + UErrorCode status = U_ZERO_ERROR; + locale.append(loc, status); + if (U_FAILURE(status)) { return; } + } const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv(); for (;;) { UErrorCode ec2 = U_ZERO_ERROR; // TODO: ures_openDirect? - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2); + UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale.data(), &ec2); UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, nullptr, &ec2); int32_t n = ures_getSize(curr); for (int32_t i=0; ipHeader == &U_ICUDATA_ENTRY_POINT) { + if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT.hdr) { /* The linked-in data is already in the list. */ return nullptr; } @@ -719,7 +719,7 @@ openCommonData(const char *path, /* Path from OpenChoice? */ */ #if !defined(ICU_DATA_DIR_WINDOWS) // When using the Windows system data, we expect only a single data file. - setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, false, pErrorCode); + setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT.hdr, false, pErrorCode); { Mutex lock; return gCommonICUDataArray[commonDataIndex]; @@ -1196,7 +1196,7 @@ doOpenChoice(const char *path, const char *type, const char *name, *p = U_FILE_SEP_CHAR; } #if defined (UDATA_DEBUG) - fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s); + fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.data()); #endif path = altSepPath.data(); } diff --git a/yass/third_party/icu/source/common/uloc.cpp b/yass/third_party/icu/source/common/uloc.cpp index 395df1466e..ce49d6c50e 100644 --- a/yass/third_party/icu/source/common/uloc.cpp +++ b/yass/third_party/icu/source/common/uloc.cpp @@ -103,12 +103,12 @@ static const char * const LANGUAGES[] = { "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", "bgc", "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", - "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", + "blo", "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg", "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", - "cs", "csb", "cu", "cv", "cy", + "cs", "csb", "csw", "cu", "cv", "cy", "da", "dak", "dar", "dav", "de", "del", "den", "dgr", "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv", "dyo", "dyu", "dz", "dzg", @@ -135,7 +135,7 @@ static const char * const LANGUAGES[] = { "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi", "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl", "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut", - "kv", "kw", "ky", + "kv", "kw", "kxv", "ky", "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn", "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo", "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui", @@ -169,14 +169,14 @@ static const char * const LANGUAGES[] = { "sv", "sw", "swb", "syc", "syr", "szl", "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", - "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", + "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tok", "tpi", "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", - "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo", - "vot", "vro", "vun", + "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vmw", + "vo", "vot", "vro", "vun", "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu", - "xal", "xh", "xmf", "xog", + "xal", "xh", "xmf", "xnr", "xog", "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue", "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", "zun", "zxx", "zza", @@ -220,12 +220,12 @@ static const char * const LANGUAGES_3[] = { "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", "bgc", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", - "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", + "blo", "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg", "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", - "ces", "csb", "chu", "chv", "cym", + "ces", "csb", "csw", "chu", "chv", "cym", "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr", "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div", "dyo", "dyu", "dzo", "dzg", @@ -252,7 +252,7 @@ static const char * const LANGUAGES_3[] = { "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi", "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl", "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut", - "kom", "cor", "kir", + "kom", "cor", "kxv", "kir", "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn", "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao", "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui", @@ -286,14 +286,14 @@ static const char * const LANGUAGES_3[] = { "swe", "swa", "swb", "syc", "syr", "szl", "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", - "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", + "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tok", "tpi", "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", - "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol", - "vot", "vro", "vun", + "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vmw", + "vol", "vot", "vro", "vun", "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu", - "xal", "xho", "xmf", "xog", + "xal", "xho", "xmf", "xnr", "xog", "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue", "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", "zun", "zxx", "zza", @@ -477,25 +477,6 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = { /* ### BCP47 Conversion *******************************************/ /* Test if the locale id has BCP47 u extension and does not have '@' */ #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(localeID) == 1) -/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ -static const char* _ConvertBCP47( - const char* id, char* buffer, int32_t length, - UErrorCode* err, int32_t* pLocaleIdSize) { - const char* finalID; - int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, nullptr, err); - if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { - finalID=id; - if (*err == U_STRING_NOT_TERMINATED_WARNING) { - *err = U_BUFFER_OVERFLOW_ERROR; - } - } else { - finalID=buffer; - } - if (pLocaleIdSize != nullptr) { - *pLocaleIdSize = localeIDSize; - } - return finalID; -} /* Gets the size of the shortest subtag in the given localeID. */ static int32_t getShortestSubtagLength(const char *localeID) { int32_t localeIDLength = static_cast(uprv_strlen(localeID)); @@ -762,7 +743,7 @@ ulocimp_getKeywordValue(const char* localeID, char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; if(status && U_SUCCESS(*status) && localeID) { - char tempBuffer[ULOC_FULLNAME_CAPACITY]; + CharString tempBuffer; const char* tmpLocaleID; if (keywordName == nullptr || keywordName[0] == 0) { @@ -776,8 +757,9 @@ ulocimp_getKeywordValue(const char* localeID, } if (_hasBCP47Extension(localeID)) { - tmpLocaleID = _ConvertBCP47(localeID, tempBuffer, - sizeof(tempBuffer), status, nullptr); + CharStringByteSink sink(&tempBuffer); + ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status); + tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID; } else { tmpLocaleID=localeID; } @@ -1406,7 +1388,7 @@ U_CAPI UEnumeration* U_EXPORT2 uloc_openKeywords(const char* localeID, UErrorCode* status) { - char tempBuffer[ULOC_FULLNAME_CAPACITY]; + CharString tempBuffer; const char* tmpLocaleID; if(status==nullptr || U_FAILURE(*status)) { @@ -1414,8 +1396,9 @@ uloc_openKeywords(const char* localeID, } if (_hasBCP47Extension(localeID)) { - tmpLocaleID = _ConvertBCP47(localeID, tempBuffer, - sizeof(tempBuffer), status, nullptr); + CharStringByteSink sink(&tempBuffer); + ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status); + tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID; } else { if (localeID==nullptr) { localeID=uloc_getDefault(); @@ -1489,7 +1472,7 @@ _canonicalize(const char* localeID, } int32_t j, fieldCount=0, scriptSize=0, variantSize=0; - PreflightingLocaleIDBuffer tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this + CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this const char* origLocaleID; const char* tmpLocaleID; @@ -1512,13 +1495,9 @@ _canonicalize(const char* localeID, } } - do { - // After this call tmpLocaleID may point to localeIDPtr which may - // point to either localeID or localeIDWithHyphens.data(). - tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(), - tempBuffer.getCapacity(), err, - &(tempBuffer.requestedCapacity)); - } while (tempBuffer.needToTryAgain(err)); + CharStringByteSink tempSink(&tempBuffer); + ulocimp_forLanguageTag(localeIDPtr, -1, tempSink, nullptr, err); + tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr; } else { if (localeID==nullptr) { localeID=uloc_getDefault(); @@ -1676,12 +1655,39 @@ uloc_getParent(const char* localeID, char* parent, int32_t parentCapacity, UErrorCode* err) +{ + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(parent, parentCapacity); + ulocimp_getParent(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(parent, parentCapacity, reslen, err); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_getParent(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err) { const char *lastUnderscore; int32_t i; if (U_FAILURE(*err)) - return 0; + return; if (localeID == nullptr) localeID = uloc_getDefault(); @@ -1697,13 +1703,9 @@ uloc_getParent(const char* localeID, if (uprv_strnicmp(localeID, "und_", 4) == 0) { localeID += 3; i -= 3; - uprv_memmove(parent, localeID, uprv_min(i, parentCapacity)); - } else if (parent != localeID) { - uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); } + sink.Append(localeID, i); } - - return u_terminateChars(parent, parentCapacity, i, err); } U_CAPI int32_t U_EXPORT2 @@ -1795,7 +1797,7 @@ uloc_getVariant(const char* localeID, int32_t variantCapacity, UErrorCode* err) { - char tempBuffer[ULOC_FULLNAME_CAPACITY]; + CharString tempBuffer; const char* tmpLocaleID; int32_t i=0; @@ -1804,7 +1806,9 @@ uloc_getVariant(const char* localeID, } if (_hasBCP47Extension(localeID)) { - tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr); + CharStringByteSink sink(&tempBuffer); + ulocimp_forLanguageTag(localeID, -1, sink, nullptr, err); + tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID; } else { if (localeID==nullptr) { localeID=uloc_getDefault(); diff --git a/yass/third_party/icu/source/common/uloc_tag.cpp b/yass/third_party/icu/source/common/uloc_tag.cpp index 68af81b529..fe3261c75d 100644 --- a/yass/third_party/icu/source/common/uloc_tag.cpp +++ b/yass/third_party/icu/source/common/uloc_tag.cpp @@ -1888,11 +1888,8 @@ static void _appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) { (void)hadPosix; char buf[ULOC_FULLNAME_CAPACITY]; - char tmpAppend[ULOC_FULLNAME_CAPACITY]; UErrorCode tmpStatus = U_ZERO_ERROR; int32_t len, i; - int32_t reslen = 0; - int32_t capacity = sizeof tmpAppend; if (U_FAILURE(*status)) { return; @@ -1945,37 +1942,18 @@ _appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool } if (writeValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } + sink.Append("-", 1); if (firstValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = *PRIVATEUSE_KEY; - } - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - + sink.Append(PRIVATEUSE_KEY, UPRV_LENGTHOF(PRIVATEUSE_KEY) - 1); + sink.Append("-", 1); + sink.Append(PRIVUSE_VARIANT_PREFIX, UPRV_LENGTHOF(PRIVUSE_VARIANT_PREFIX) - 1); + sink.Append("-", 1); firstValue = false; } len = (int32_t)uprv_strlen(pPriv); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); - } - reslen += len; + sink.Append(pPriv, len); } } /* reset private use starting position */ @@ -1985,15 +1963,6 @@ _appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool } p++; } - - if (U_FAILURE(*status)) { - return; - } - } - - if (U_SUCCESS(*status)) { - len = reslen; - sink.Append(tmpAppend, len); } } @@ -2656,53 +2625,18 @@ ulocimp_toLanguageTag(const char* localeID, UBool strict, UErrorCode* status) { icu::CharString canonical; - int32_t reslen; UErrorCode tmpStatus = U_ZERO_ERROR; UBool hadPosix = false; const char* pKeywordStart; /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - int32_t resultCapacity = static_cast(uprv_strlen(localeID)); - if (resultCapacity > 0) { - char* buffer; - - for (;;) { - buffer = canonical.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - tmpStatus); - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return; - } - - reslen = - uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); - - if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - resultCapacity = reslen; - tmpStatus = U_ZERO_ERROR; - } - - if (U_FAILURE(tmpStatus)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - canonical.append(buffer, reslen, tmpStatus); - if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return; - } + { + icu::CharStringByteSink canonicalSink(&canonical); + ulocimp_canonicalize(localeID, canonicalSink, &tmpStatus); + } + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return; } /* For handling special case - private use only tag */ diff --git a/yass/third_party/icu/source/common/ulocale.cpp b/yass/third_party/icu/source/common/ulocale.cpp new file mode 100644 index 0000000000..471ef0ce79 --- /dev/null +++ b/yass/third_party/icu/source/common/ulocale.cpp @@ -0,0 +1,99 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +#include "unicode/errorcode.h" +#include "unicode/stringpiece.h" +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/ulocale.h" +#include "unicode/locid.h" + +#include "charstr.h" +#include "cmemory.h" +#include "ustr_imp.h" + +U_NAMESPACE_USE +#define EXTERNAL(i) (reinterpret_cast(i)) +#define CONST_INTERNAL(e) (reinterpret_cast(e)) +#define INTERNAL(e) (reinterpret_cast(e)) + +ULocale* +ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err) { + CharString str(length < 0 ? StringPiece(localeID) : StringPiece(localeID, length), *err); + if (U_FAILURE(*err)) return nullptr; + return EXTERNAL(icu::Locale::createFromName(str.data()).clone()); +} + +ULocale* +ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err) { + Locale l = icu::Locale::forLanguageTag(length < 0 ? StringPiece(tag) : StringPiece(tag, length), *err); + if (U_FAILURE(*err)) return nullptr; + return EXTERNAL(l.clone()); +} + +void +ulocale_close(ULocale* locale) { + delete INTERNAL(locale); +} + +#define IMPL_ULOCALE_STRING_GETTER(N1, N2) \ +const char* ulocale_get ## N1(const ULocale* locale) { \ + if (locale == nullptr) return nullptr; \ + return CONST_INTERNAL(locale)->get ## N2(); \ +} + +#define IMPL_ULOCALE_STRING_IDENTICAL_GETTER(N) IMPL_ULOCALE_STRING_GETTER(N, N) + +#define IMPL_ULOCALE_GET_KEYWORD_VALUE(N) \ +int32_t ulocale_get ##N ( \ + const ULocale* locale, const char* keyword, int32_t keywordLength, \ + char* valueBuffer, int32_t bufferCapacity, UErrorCode *err) { \ + if (U_FAILURE(*err)) return 0; \ + if (locale == nullptr) { \ + *err = U_ILLEGAL_ARGUMENT_ERROR; \ + return 0; \ + } \ + CheckedArrayByteSink sink(valueBuffer, bufferCapacity); \ + CONST_INTERNAL(locale)->get ## N( \ + keywordLength < 0 ? StringPiece(keyword) : StringPiece(keyword, keywordLength), \ + sink, *err); \ + int32_t reslen = sink.NumberOfBytesAppended(); \ + if (U_FAILURE(*err)) { \ + return reslen; \ + } \ + if (sink.Overflowed()) { \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } else { \ + u_terminateChars(valueBuffer, bufferCapacity, reslen, err); \ + } \ + return reslen; \ +} + +#define IMPL_ULOCALE_GET_KEYWORDS(N) \ +UEnumeration* ulocale_get ## N(const ULocale* locale, UErrorCode *err) { \ + if (U_FAILURE(*err)) return nullptr; \ + if (locale == nullptr) { \ + *err = U_ILLEGAL_ARGUMENT_ERROR; \ + return nullptr; \ + } \ + return uenum_openFromStringEnumeration( \ + CONST_INTERNAL(locale)->create ## N(*err), err); \ +} + +IMPL_ULOCALE_STRING_IDENTICAL_GETTER(Language) +IMPL_ULOCALE_STRING_IDENTICAL_GETTER(Script) +IMPL_ULOCALE_STRING_GETTER(Region, Country) +IMPL_ULOCALE_STRING_IDENTICAL_GETTER(Variant) +IMPL_ULOCALE_STRING_GETTER(LocaleID, Name) +IMPL_ULOCALE_STRING_IDENTICAL_GETTER(BaseName) +IMPL_ULOCALE_GET_KEYWORD_VALUE(KeywordValue) +IMPL_ULOCALE_GET_KEYWORD_VALUE(UnicodeKeywordValue) +IMPL_ULOCALE_GET_KEYWORDS(Keywords) +IMPL_ULOCALE_GET_KEYWORDS(UnicodeKeywords) + +bool ulocale_isBogus(const ULocale* locale) { + if (locale == nullptr) return false; + return CONST_INTERNAL(locale)->isBogus(); +} + +/*eof*/ diff --git a/yass/third_party/icu/source/common/ulocbuilder.cpp b/yass/third_party/icu/source/common/ulocbuilder.cpp new file mode 100644 index 0000000000..a5af73bef4 --- /dev/null +++ b/yass/third_party/icu/source/common/ulocbuilder.cpp @@ -0,0 +1,156 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "unicode/bytestream.h" +#include "unicode/localebuilder.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/umachine.h" +#include "unicode/ulocbuilder.h" +#include "cstring.h" +#include "ustr_imp.h" + +using icu::CheckedArrayByteSink; +using icu::StringPiece; + +#define EXTERNAL(i) (reinterpret_cast(i)) +#define INTERNAL(e) (reinterpret_cast(e)) +#define CONST_INTERNAL(e) (reinterpret_cast(e)) + +ULocaleBuilder* ulocbld_open() { + return EXTERNAL(new icu::LocaleBuilder()); +} + +void ulocbld_close(ULocaleBuilder* builder) { + if (builder == nullptr) return; + delete INTERNAL(builder); +} + +void ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length) { + if (builder == nullptr) return; + icu::Locale l; + if (length < 0 || locale[length] == '\0') { + l = icu::Locale(locale); + } else { + if (length >= ULOC_FULLNAME_CAPACITY) { + l.setToBogus(); + } else { + // locale is not null termined but Locale API require one. + // Create a null termined version in buf. + char buf[ULOC_FULLNAME_CAPACITY]; + uprv_memcpy(buf, locale, length); + buf[length] = '\0'; + l = icu::Locale(buf); + } + } + INTERNAL(builder)->setLocale(l); +} + +void +ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale) { + if (builder == nullptr) return; + INTERNAL(builder)->setLocale(*(reinterpret_cast(locale))); + ulocale_close(locale); +} + +#define STRING_PIECE(s, l) ((l)<0 ? StringPiece(s) : StringPiece((s), (l))) + +#define IMPL_ULOCBLD_SETTER(N) \ +void ulocbld_##N(ULocaleBuilder* bld, const char* s, int32_t l) { \ + if (bld == nullptr) return; \ + INTERNAL(bld)->N(STRING_PIECE(s,l)); \ +} + +IMPL_ULOCBLD_SETTER(setLanguageTag) +IMPL_ULOCBLD_SETTER(setLanguage) +IMPL_ULOCBLD_SETTER(setScript) +IMPL_ULOCBLD_SETTER(setRegion) +IMPL_ULOCBLD_SETTER(setVariant) +IMPL_ULOCBLD_SETTER(addUnicodeLocaleAttribute) +IMPL_ULOCBLD_SETTER(removeUnicodeLocaleAttribute) + +void ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length) { + if (builder == nullptr) return; + INTERNAL(builder)->setExtension(key, STRING_PIECE(value, length)); +} + +void ulocbld_setUnicodeLocaleKeyword( + ULocaleBuilder* builder, const char* key, int32_t keyLength, + const char* type, int32_t typeLength) { + if (builder == nullptr) return; + INTERNAL(builder)->setUnicodeLocaleKeyword( + STRING_PIECE(key, keyLength), STRING_PIECE(type, typeLength)); +} + +void ulocbld_clear(ULocaleBuilder* builder) { + if (builder == nullptr) return; + INTERNAL(builder)->clear(); +} + +void ulocbld_clearExtensions(ULocaleBuilder* builder) { + if (builder == nullptr) return; + INTERNAL(builder)->clearExtensions(); +} + + +ULocale* ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err) { + if (builder == nullptr) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + icu::Locale l = INTERNAL(builder)->build(*err); + if (U_FAILURE(*err)) return nullptr; + icu::Locale* r = l.clone(); + if (r == nullptr) { + *err = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return reinterpret_cast(r); +} + +int32_t ulocbld_buildLocaleID(ULocaleBuilder* builder, + char* buffer, int32_t bufferCapacity, UErrorCode* err) { + if (builder == nullptr) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + icu::Locale l = INTERNAL(builder)->build(*err); + if (U_FAILURE(*err)) return 0; + int32_t length = (int32_t)(uprv_strlen(l.getName())); + if (0 < length && length <= bufferCapacity) { + uprv_memcpy(buffer, l.getName(), length); + } + return u_terminateChars(buffer, bufferCapacity, length, err); +} + +int32_t ulocbld_buildLanguageTag(ULocaleBuilder* builder, + char* buffer, int32_t bufferCapacity, UErrorCode* err) { + if (builder == nullptr) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + icu::Locale l = INTERNAL(builder)->build(*err); + if (U_FAILURE(*err)) return 0; + CheckedArrayByteSink sink(buffer, bufferCapacity); + l.toLanguageTag(sink, *err); + int32_t reslen = sink.NumberOfBytesAppended(); + if (U_FAILURE(*err)) { + return reslen; + } + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(buffer, bufferCapacity, reslen, err); + } + return reslen; +} + +UBool ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode) { + if (builder == nullptr) { + *outErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return true; + } + return CONST_INTERNAL(builder)->copyErrorTo(*outErrorCode); +} diff --git a/yass/third_party/icu/source/common/ulocimp.h b/yass/third_party/icu/source/common/ulocimp.h index 48341054e3..efa0fa72e6 100644 --- a/yass/third_party/icu/source/common/ulocimp.h +++ b/yass/third_party/icu/source/common/ulocimp.h @@ -92,6 +92,11 @@ ulocimp_getKeywordValue(const char* localeID, icu::ByteSink& sink, UErrorCode* status); +U_CAPI void U_EXPORT2 +ulocimp_getParent(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + /** * Writes a well-formed language tag for this locale ID. * @@ -237,6 +242,7 @@ ulocimp_addLikelySubtags(const char* localeID, * * @param localeID The locale to minimize * @param sink The output sink receiving the maximized locale + * @param favorScript favor to keep script if true, region if false. * @param err Error information if minimizing the locale failed. If the length * of the localeID and the null-terminator is greater than the maximum allowed size, * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. @@ -245,6 +251,7 @@ ulocimp_addLikelySubtags(const char* localeID, U_CAPI void U_EXPORT2 ulocimp_minimizeSubtags(const char* localeID, icu::ByteSink& sink, + bool favorScript, UErrorCode* err); U_CAPI const char * U_EXPORT2 @@ -307,72 +314,4 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le // Return true if the value is already canonicalized. U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); -/** - * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY. - * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack, - * and then, if it's not big enough, reallocate it on the heap and try again. - * - * You use it like this: - * UErrorCode err = U_ZERO_ERROR; - * - * PreflightingLocaleIDBuffer tempBuffer; - * do { - * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err); - * } while (tempBuffer.needToTryAgain(&err)); - * if (U_SUCCESS(err)) { - * uloc_doSomethingWithTheResult(tempBuffer.getBuffer()); - * } - */ -class PreflightingLocaleIDBuffer { -private: - char stackBuffer[ULOC_FULLNAME_CAPACITY]; - char* heapBuffer = nullptr; - int32_t capacity = ULOC_FULLNAME_CAPACITY; - -public: - int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY; - - // No heap allocation. Use only on the stack. - static void* U_EXPORT2 operator new(size_t) noexcept = delete; - static void* U_EXPORT2 operator new[](size_t) noexcept = delete; -#if U_HAVE_PLACEMENT_NEW - static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete; -#endif - - PreflightingLocaleIDBuffer() {} - - ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); } - - char* getBuffer() { - if (heapBuffer == nullptr) { - return stackBuffer; - } else { - return heapBuffer; - } - } - - int32_t getCapacity() { - return capacity; - } - - bool needToTryAgain(UErrorCode* err) { - if (heapBuffer != nullptr) { - return false; - } - - if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) { - int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia - heapBuffer = static_cast(uprv_malloc(newCapacity)); - if (heapBuffer == nullptr) { - *err = U_MEMORY_ALLOCATION_ERROR; - } else { - *err = U_ZERO_ERROR; - capacity = newCapacity; - } - return U_SUCCESS(*err); - } - return false; - } -}; - #endif diff --git a/yass/third_party/icu/source/common/unicode/brkiter.h b/yass/third_party/icu/source/common/unicode/brkiter.h index 108652799e..1b10e6ef11 100644 --- a/yass/third_party/icu/source/common/unicode/brkiter.h +++ b/yass/third_party/icu/source/common/unicode/brkiter.h @@ -649,6 +649,7 @@ private: /** @internal (private) */ char actualLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY]; + char requestLocale[ULOC_FULLNAME_CAPACITY]; }; #ifndef U_HIDE_DEPRECATED_API diff --git a/yass/third_party/icu/source/common/unicode/docmain.h b/yass/third_party/icu/source/common/unicode/docmain.h index 1349f49703..581b2e186d 100644 --- a/yass/third_party/icu/source/common/unicode/docmain.h +++ b/yass/third_party/icu/source/common/unicode/docmain.h @@ -114,7 +114,7 @@ * * * Locales - * uloc.h + * uloc.h, ulocale.h, ulocbuilder.h * icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher * * diff --git a/yass/third_party/icu/source/common/unicode/enumset.h b/yass/third_party/icu/source/common/unicode/enumset.h index 6d7fa72b00..bde8c455c0 100644 --- a/yass/third_party/icu/source/common/unicode/enumset.h +++ b/yass/third_party/icu/source/common/unicode/enumset.h @@ -43,7 +43,7 @@ public: inline int32_t contains(T toCheck) const { return get(toCheck); } inline void set(T toSet, int32_t v) { fBools=(fBools&(~flag(toSet)))|(v?(flag(toSet)):0); } inline int32_t get(T toCheck) const { return (fBools & flag(toCheck))?1:0; } - inline UBool isValidEnum(T toCheck) const { return ((uint32_t)toCheck>=minValue&&(uint32_t)toCheck=minValue&&toCheck& operator=(const EnumSet& other) { fBools = other.fBools; diff --git a/yass/third_party/icu/source/common/unicode/locid.h b/yass/third_party/icu/source/common/unicode/locid.h index a6fbbb7041..f0bdc7ca51 100644 --- a/yass/third_party/icu/source/common/unicode/locid.h +++ b/yass/third_party/icu/source/common/unicode/locid.h @@ -984,7 +984,10 @@ public: static const char* const* U_EXPORT2 getISOCountries(); /** - * Gets a list of all available language codes defined in ISO 639. This is a pointer + * Returns a list of all unique language codes defined in ISO 639. + * They can be 2 or 3 letter codes, as defined by + * + * BCP 47, section 2.2.1. This is a pointer * to an array of pointers to arrays of char. All of these pointers are owned * by ICU-- do not delete them, and do not write through them. The array is * terminated with a null pointer. @@ -1110,6 +1113,15 @@ protected: /* only protected for testing purposes. DO NOT USE. */ * @internal */ void setFromPOSIXID(const char *posixID); + /** + * Minimize the subtags for this Locale, per the algorithm described + * @param favorScript favor to keep script if true, to keep region if false. + * @param status error information if maximizing this Locale failed. + * If this Locale is not well-formed, the error code is + * U_ILLEGAL_ARGUMENT_ERROR. + * @internal + */ + void minimizeSubtags(bool favorScript, UErrorCode& status); #endif /* U_HIDE_INTERNAL_API */ private: diff --git a/yass/third_party/icu/source/common/unicode/normalizer2.h b/yass/third_party/icu/source/common/unicode/normalizer2.h index 972894ec42..6856ff8720 100644 --- a/yass/third_party/icu/source/common/unicode/normalizer2.h +++ b/yass/third_party/icu/source/common/unicode/normalizer2.h @@ -147,7 +147,10 @@ public: getNFKDInstance(UErrorCode &errorCode); /** - * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. + * Returns a Normalizer2 instance for Unicode toNFKC_Casefold() normalization + * which is equivalent to applying the NFKC_Casefold mappings and then NFC. + * See https://www.unicode.org/reports/tr44/#NFKC_Casefold + * * Same as getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, errorCode). * Returns an unmodifiable singleton instance. Do not delete it. * @param errorCode Standard ICU error code. Its input value must @@ -160,6 +163,25 @@ public: static const Normalizer2 * getNFKCCasefoldInstance(UErrorCode &errorCode); +#ifndef U_HIDE_DRAFT_API + /** + * Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization + * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC. + * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold + * + * Same as getInstance(nullptr, "nfkc_scf", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 74 + */ + static const Normalizer2 * + getNFKCSimpleCasefoldInstance(UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API + /** * Returns a Normalizer2 instance which uses the specified data file * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) @@ -172,7 +194,7 @@ public: * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. * * @param packageName nullptr for ICU built-in data, otherwise application data package name - * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file * @param mode normalization mode (compose or decompose etc.) * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns diff --git a/yass/third_party/icu/source/common/unicode/rbbi.h b/yass/third_party/icu/source/common/unicode/rbbi.h index 418b52e41f..045238ac5d 100644 --- a/yass/third_party/icu/source/common/unicode/rbbi.h +++ b/yass/third_party/icu/source/common/unicode/rbbi.h @@ -43,6 +43,69 @@ class RBBIDataWrapper; class UnhandledEngine; class UStack; + +#ifndef U_HIDE_DRAFT_API +/** + * The ExternalBreakEngine class define an abstract interface for the host environment + * to provide a low level facility to break text for unicode text in script that the text boundary + * cannot be handled by upper level rule based logic, for example, for Chinese and Japanese + * word breaking, Thai, Khmer, Burmese, Lao and other Southeast Asian scripts. + * The host environment implement one or more subclass of ExternalBreakEngine and + * register them in the initialization time by calling + * RuleBasedBreakIterator::registerExternalBreakEngine(). ICU adopt and own the engine and will + * delete the registered external engine in proper time during the clean up + * event. + * @internal ICU 74 technology preview + */ +class ExternalBreakEngine : public UObject { + public: + /** + * destructor + * @internal ICU 74 technology preview + */ + virtual ~ExternalBreakEngine() {} + + /** + *

Indicate whether this engine handles a particular character when + * the RuleBasedBreakIterator is used for a particular locale. This method is used + * by the RuleBasedBreakIterator to find a break engine.

+ * @param c A character which begins a run that the engine might handle. + * @param locale The locale. + * @return true if this engine handles the particular character for that locale. + * @internal ICU 74 technology preview + */ + virtual bool isFor(UChar32 c, const char* locale) const = 0; + + /** + *

Indicate whether this engine handles a particular character.This method is + * used by the RuleBasedBreakIterator after it already find a break engine to see which + * characters after the first one can be handled by this break engine.

+ * @param c A character that the engine might handle. + * @return true if this engine handles the particular character. + * @internal ICU 74 technology preview + */ + virtual bool handles(UChar32 c) const = 0; + + /** + *

Divide up a range of text handled by this break engine.

+ * + * @param text A UText representing the text + * @param start The start of the range of known characters + * @param end The end of the range of known characters + * @param foundBreaks Output of C array of int32_t break positions, or + * nullptr + * @param foundBreaksCapacity The capacity of foundBreaks + * @param status Information on any errors encountered. + * @return The number of breaks found + * @internal ICU 74 technology preview + */ + virtual int32_t fillBreaks(UText* text, int32_t start, int32_t end, + int32_t* foundBreaks, int32_t foundBreaksCapacity, + UErrorCode& status) const = 0; +}; +#endif /* U_HIDE_DRAFT_API */ + + /** * * A subclass of BreakIterator whose behavior is specified using a list of rules. @@ -716,9 +779,10 @@ private: * This function returns the appropriate LanguageBreakEngine for a * given character c. * @param c A character in the dictionary set + * @param locale The locale. * @internal (private) */ - const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); + const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale); public: #ifndef U_HIDE_INTERNAL_API @@ -734,8 +798,24 @@ private: */ void dumpTables(); #endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Register a new external break engine. The external break engine will be adopted. + * Because ICU may choose to cache break engine internally, this must + * be called at application startup, prior to any calls to + * object methods of RuleBasedBreakIterator to avoid undefined behavior. + * @param toAdopt the ExternalBreakEngine instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @internal ICU 74 technology preview + */ + static void U_EXPORT2 registerExternalBreakEngine( + ExternalBreakEngine* toAdopt, UErrorCode& status); +#endif /* U_HIDE_DRAFT_API */ + }; + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/yass/third_party/icu/source/common/unicode/uchar.h b/yass/third_party/icu/source/common/unicode/uchar.h index 4f82a9fb58..7470891338 100644 --- a/yass/third_party/icu/source/common/unicode/uchar.h +++ b/yass/third_party/icu/source/common/unicode/uchar.h @@ -60,7 +60,7 @@ U_CDECL_BEGIN * @see u_getUnicodeVersion * @stable ICU 2.0 */ -#define U_UNICODE_VERSION "15.0" +#define U_UNICODE_VERSION "15.1" /** * \file @@ -532,12 +532,33 @@ typedef enum UProperty { * @stable ICU 70 */ UCHAR_RGI_EMOJI=71, +#ifndef U_HIDE_DRAFT_API + /** + * Binary property IDS_Unary_Operator. + * For programmatic determination of Ideographic Description Sequences. + * + * @draft ICU 74 + */ + UCHAR_IDS_UNARY_OPERATOR=72, + /** + * Binary property ID_Compat_Math_Start. + * Used in mathematical identifier profile in UAX #31. + * @draft ICU 74 + */ + UCHAR_ID_COMPAT_MATH_START=73, + /** + * Binary property ID_Compat_Math_Continue. + * Used in mathematical identifier profile in UAX #31. + * @draft ICU 74 + */ + UCHAR_ID_COMPAT_MATH_CONTINUE=74, +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for binary Unicode properties. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UCHAR_BINARY_LIMIT=72, + UCHAR_BINARY_LIMIT=75, #endif // U_HIDE_DEPRECATED_API /** Enumerated property Bidi_Class. @@ -1900,6 +1921,11 @@ enum UBlockCode { /** @stable ICU 72 */ UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/ + // New block in Unicode 15.1 + + /** @stable ICU 74 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328, /*[2EBF0]*/ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UBlockCode value. @@ -1907,7 +1933,7 @@ enum UBlockCode { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UBLOCK_COUNT = 328, + UBLOCK_COUNT = 329, #endif // U_HIDE_DEPRECATED_API /** @stable ICU 2.0 */ @@ -2439,6 +2465,16 @@ typedef enum ULineBreak { U_LB_E_MODIFIER = 41, /*[EM]*/ /** @stable ICU 58 */ U_LB_ZWJ = 42, /*[ZWJ]*/ + /** @stable ICU 74 */ + U_LB_AKSARA = 43, /*[AK]*/ + /** @stable ICU 74 */ + U_LB_AKSARA_PREBASE = 44, /*[AP]*/ + /** @stable ICU 74 */ + U_LB_AKSARA_START = 45, /*[AS]*/ + /** @stable ICU 74 */ + U_LB_VIRAMA_FINAL = 46, /*[VF]*/ + /** @stable ICU 74 */ + U_LB_VIRAMA = 47, /*[VI]*/ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal ULineBreak value. @@ -2446,7 +2482,7 @@ typedef enum ULineBreak { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_LB_COUNT = 43 + U_LB_COUNT = 48 #endif // U_HIDE_DEPRECATED_API } ULineBreak; diff --git a/yass/third_party/icu/source/common/unicode/ulocale.h b/yass/third_party/icu/source/common/unicode/ulocale.h new file mode 100644 index 0000000000..33e92844bc --- /dev/null +++ b/yass/third_party/icu/source/common/unicode/ulocale.h @@ -0,0 +1,229 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef ULOCALE_H +#define ULOCALE_H + +#include "unicode/localpointer.h" +#include "unicode/uenum.h" +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Locale ID functionality similar to C++ class Locale + */ + +#ifndef U_HIDE_DRAFT_API +/** + * Opaque C service object type for the locale API + * @draft ICU 74 + */ +struct ULocale; + +/** + * C typedef for struct ULocale. + * @draft ICU 74 + */ +typedef struct ULocale ULocale; + +/** + * Constructs an ULocale from the locale ID. + * The created ULocale should be destroyed by calling + * ulocale_close(); + * @param localeID the locale, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the locale; if negative, then the locale need to be + * null terminated. + * @param err the error code + * @return the locale. + * + * @draft ICU 74 + */ +U_CAPI ULocale* U_EXPORT2 +ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err); + +/** + * Constructs an ULocale from the provided IETF BCP 47 language tag. + * The created ULocale should be destroyed by calling + * ulocale_close(); + * @param tag the language tag, defined as IETF BCP 47 language tag, const + * char* pointer (need not be terminated when the length is non-negative) + * @param length the length of the tag; if negative, then the tag need to be + * null terminated. + * @param err the error code + * @return the locale. + * + * @draft ICU 74 + */ +U_CAPI ULocale* U_EXPORT2 +ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err); + +/** + * Close the locale and destroy it's internal states. + * + * @param locale the locale + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocale_close(ULocale* locale); + +/** + * Returns the locale's ISO-639 language code. + * + * @param locale the locale + * @return the language code of the locale. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getLanguage(const ULocale* locale); + +/** + * Returns the locale's ISO-15924 abbreviation script code. + * + * @param locale the locale + * @return A pointer to the script. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getScript(const ULocale* locale); + +/** + * Returns the locale's ISO-3166 region code. + * + * @param locale the locale + * @return A pointer to the region. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getRegion(const ULocale* locale); + +/** + * Returns the locale's variant code. + * + * @param locale the locale + * @return A pointer to the variant. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getVariant(const ULocale* locale); + +/** + * Returns the programmatic name of the entire locale, with the language, + * country and variant separated by underbars. If a field is missing, up + * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", + * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" + * + * @param locale the locale + * @return A pointer to "name". + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getLocaleID(const ULocale* locale); + +/** + * Returns the programmatic name of the entire locale as ulocale_getLocaleID() + * would return, but without keywords. + * + * @param locale the locale + * @return A pointer to "base name". + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getBaseName(const ULocale* locale); + +/** + * Gets the bogus state. Locale object can be bogus if it doesn't exist + * + * @param locale the locale + * @return false if it is a real locale, true if it is a bogus locale + * @draft ICU 74 + */ +U_CAPI bool U_EXPORT2 +ulocale_isBogus(const ULocale* locale); + +/** + * Gets the list of keywords for the specified locale. + * + * @param locale the locale + * @param err the error code + * @return pointer to UEnumeration, or nullptr if there are no keywords. + * Client must call uenum_close() to dispose the returned value. + * @draft ICU 74 + */ +U_CAPI UEnumeration* U_EXPORT2 +ulocale_getKeywords(const ULocale* locale, UErrorCode *err); + +/** + * Gets the list of unicode keywords for the specified locale. + * + * @param locale the locale + * @param err the error code + * @return pointer to UEnumeration, or nullptr if there are no keywords. + * Client must call uenum_close() to dispose the returned value. + * @draft ICU 74 + */ +U_CAPI UEnumeration* U_EXPORT2 +ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err); + +/** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * @param locale the locale + * @param keyword the keyword, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param keywordLength the length of the keyword; if negative, then the + * keyword need to be null terminated. + * @param valueBuffer The buffer to receive the value. + * @param valueBufferCapacity The capacity of receiving valueBuffer. + * @param err the error code + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ulocale_getKeywordValue( + const ULocale* locale, const char* keyword, int32_t keywordLength, + char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err); + +/** + * Gets the Unicode value for a Unicode keyword. + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * @param locale the locale + * @param keyword the Unicode keyword, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param keywordLength the length of the Unicode keyword; if negative, + * then the keyword need to be null terminated. + * @param valueBuffer The buffer to receive the Unicode value. + * @param valueBufferCapacity The capacity of receiving valueBuffer. + * @param err the error code + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ulocale_getUnicodeKeywordValue( + const ULocale* locale, const char* keyword, int32_t keywordLength, + char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalULocalePointer + * "Smart pointer" class, closes a ULocale via ulocale_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 74 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close); + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif /* U_HIDE_DRAFT_API */ + +#endif /*_ULOCALE */ diff --git a/yass/third_party/icu/source/common/unicode/ulocbuilder.h b/yass/third_party/icu/source/common/unicode/ulocbuilder.h new file mode 100644 index 0000000000..f2e98612f0 --- /dev/null +++ b/yass/third_party/icu/source/common/unicode/ulocbuilder.h @@ -0,0 +1,441 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +#ifndef __ULOCBUILDER_H__ +#define __ULOCBUILDER_H__ + +#include "unicode/localpointer.h" +#include "unicode/ulocale.h" +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Builder API for Locale + */ + +#ifndef U_HIDE_DRAFT_API + +/** + * Opaque C service object type for the locale builder API + * @draft ICU 74 + */ +struct ULocaleBuilder; + +/** + * C typedef for struct ULocaleBuilder. + * @draft ICU 74 + */ +typedef struct ULocaleBuilder ULocaleBuilder; + +/** + * ULocaleBuilder is used to build valid locale id + * string or IETF BCP 47 language tag from values configured by the setters. + * The ULocaleBuilder checks if a value configured by a + * setter satisfies the syntax requirements defined by the Locale + * class. A string of Locale created by a ULocaleBuilder is + * well-formed and can be transformed to a well-formed IETF BCP 47 language tag + * without losing information. + * + *

The following example shows how to create a locale string + * with the ULocaleBuilder. + *

+ *
+ *     UErrorCode err = U_ZERO_ERROR;
+ *     char buffer[ULOC_FULLNAME_CAPACITY];
+ *     ULocaleBuilder* builder = ulocbld_open();
+ *     ulocbld_setLanguage(builder, "sr", -1);
+ *     ulocbld_setScript(builder, "Latn", -1);
+ *     ulocbld_setRegion(builder, "RS", -1);
+ *     int32_t length = ulocbld_buildLocaleID(
+ *         builder, buffer, ULOC_FULLNAME_CAPACITY, &error);
+ *     ulocbld_close(builder);
+ * 
+ *
+ * + *

ULocaleBuilders can be reused; ulocbld_clear() resets all + * fields to their default values. + * + *

ULocaleBuilder tracks errors in an internal UErrorCode. For all setters, + * except ulocbld_setLanguageTag and ulocbld_setLocale, ULocaleBuilder will return immediately + * if the internal UErrorCode is in error state. + * To reset internal state and error code, call clear method. + * The ulocbld_setLanguageTag and setLocale method will first clear the internal + * UErrorCode, then track the error of the validation of the input parameter + * into the internal UErrorCode. + * + * @draft ICU 74 + */ + +/** + * Constructs an empty ULocaleBuilder. The default value of all + * fields, extensions, and private use information is the + * empty string. The created builder should be destroyed by calling + * ulocbld_close(); + * + * @draft ICU 74 + */ +U_CAPI ULocaleBuilder* U_EXPORT2 +ulocbld_open(); + +/** + * Close the builder and destroy it's internal states. + * @param builder the builder + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_close(ULocaleBuilder* builder); + +/** + * Resets the ULocaleBuilder to match the provided + * locale. Existing state is discarded. + * + *

All fields of the locale must be well-formed. + *

This method clears the internal UErrorCode. + * + * @param builder the builder + * @param locale the locale, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the locale; if negative, then the locale need to be + * null terminated, + * + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length); + +/** + * Resets the ULocaleBuilder to match the provided + * ULocale. Existing state is discarded. + * + *

The locale must be not bogus. + *

This method clears the internal UErrorCode. + * + * @param builder the builder. + * @param locale the locale, a ULocale* pointer. The builder adopts the locale + * after the call and the client must not delete it. + * + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale); + +/** + * Resets the ULocaleBuilder to match the provided IETF BCP 47 language tag. + * Discards the existing state. + * The empty string causes the builder to be reset, like {@link #ulocbld_clear}. + * Legacy language tags (marked as “Type: grandfathered” in BCP 47) + * are converted to their canonical form before being processed. + * Otherwise, the language tag must be well-formed, + * or else the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods + * will later report an U_ILLEGAL_ARGUMENT_ERROR. + * + *

This method clears the internal UErrorCode. + * + * @param builder the builder + * @param tag the language tag, defined as IETF BCP 47 language tag, a + * const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the tag; if negative, then the tag need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length); + +/** + * Sets the language. If language is the empty string, the + * language in this ULocaleBuilder is removed. Otherwise, the + * language must be well-formed, or else the ulocbld_buildLocaleID() + * and ulocbld_buildLanguageTag() methods will + * later report an U_ILLEGAL_ARGUMENT_ERROR. + * + *

The syntax of language value is defined as + * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag). + * + * @param builder the builder + * @param language the language, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the language; if negative, then the language need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t length); + +/** + * Sets the script. If script is the empty string, the script in + * this ULocaleBuilder is removed. + * Otherwise, the script must be well-formed, or else the + * ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later + * report an U_ILLEGAL_ARGUMENT_ERROR. + * + *

The script value is a four-letter script code as + * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag) + * defined by ISO 15924 + * + * @param builder the builder + * @param script the script, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the script; if negative, then the script need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length); + +/** + * Sets the region. If region is the empty string, the region in this + * ULocaleBuilder is removed. Otherwise, the region + * must be well-formed, or else the ulocbld_buildLocaleID() and + * ulocbld_buildLanguageTag() methods will later report an + * U_ILLEGAL_ARGUMENT_ERROR. + * + *

The region value is defined by + * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag) + * as a two-letter ISO 3166 code or a three-digit UN M.49 area code. + * + *

The region value in the Locale created by the + * ULocaleBuilder is always normalized to upper case. + * + * @param builder the builder + * @param region the region, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the region; if negative, then the region need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length); + +/** + * Sets the variant. If variant is the empty string, the variant in this + * ULocaleBuilder is removed. Otherwise, the variant + * must be well-formed, or else the ulocbld_buildLocaleID() and + * ulocbld_buildLanguageTag() methods will later report an + * U_ILLEGAL_ARGUMENT_ERROR. + * + *

Note: This method checks if variant + * satisfies the + * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag) + * syntax requirements, and normalizes the value to lowercase letters. However, + * the Locale class does not impose any syntactic + * restriction on variant. To set an ill-formed variant, use a Locale constructor. + * If there are multiple unicode_variant_subtag, the caller must concatenate + * them with '-' as separator (ex: "foobar-fibar"). + * + * @param builder the builder + * @param variant the variant, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the variant; if negative, then the variant need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length); + +/** + * Sets the extension for the given key. If the value is the empty string, + * the extension is removed. Otherwise, the key and + * value must be well-formed, or else the ulocbld_buildLocaleID() + * and ulocbld_buildLanguageTag() methods will + * later report an U_ILLEGAL_ARGUMENT_ERROR. + * + *

Note: The key ('u') is used for the Unicode locale extension. + * Setting a value for this key replaces any existing Unicode locale key/type + * pairs with those defined in the extension. + * + *

Note: The key ('x') is used for the private use code. To be + * well-formed, the value for this key needs only to have subtags of one to + * eight alphanumeric characters, not two to eight as in the general case. + * + * @param builder the builder + * @param key the extension key + * @param value the value, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the value; if negative, then the value need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length); + +/** + * Sets the Unicode locale keyword type for the given key. If the type + * StringPiece is constructed with a nullptr, the keyword is removed. + * If the type is the empty string, the keyword is set without type subtags. + * Otherwise, the key and type must be well-formed, or else the + * ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later + * report an U_ILLEGAL_ARGUMENT_ERROR. + * + *

Keys and types are converted to lower case. + * + *

Note:Setting the 'u' extension via {@link #ulocbld_setExtension} + * replaces all Unicode locale keywords with those defined in the + * extension. + * + * @param builder the builder + * @param key the Unicode locale key, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param keyLength the length of the key; if negative, then the key need to be + * null terminated, + * @param type the Unicode locale type, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param typeLength the length of the type; if negative, then the type need to + * be null terminated, + * @return This builder. + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder, + const char* key, int32_t keyLength, const char* type, int32_t typeLength); + +/** + * Adds a unicode locale attribute, if not already present, otherwise + * has no effect. The attribute must not be empty string and must be + * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status + * during the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() calls. + * + * @param builder the builder + * @param attribute the attribute, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param length the length of the attribute; if negative, then the attribute + * need to be null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_addUnicodeLocaleAttribute( + ULocaleBuilder* builder, const char* attribute, int32_t length); + +/** + * Removes a unicode locale attribute, if present, otherwise has no + * effect. The attribute must not be empty string and must be well-formed + * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the ulocbld_buildLocaleID() + * and ulocbld_buildLanguageTag() calls. + * + *

Attribute comparison for removal is case-insensitive. + * + * @param builder the builder + * @param attribute the attribute, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param length the length of the attribute; if negative, then the attribute + * need to be null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_removeUnicodeLocaleAttribute( + ULocaleBuilder* builder, const char* attribute, int32_t length); + +/** + * Resets the builder to its initial, empty state. + *

This method clears the internal UErrorCode. + * + * @param builder the builder + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_clear(ULocaleBuilder* builder); + +/** + * Resets the extensions to their initial, empty state. + * Language, script, region and variant are unchanged. + * + * @param builder the builder + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_clearExtensions(ULocaleBuilder* builder); + +/** + * Build the LocaleID string from the fields set on this builder. + * If any set methods or during the ulocbld_buildLocaleID() call require memory + * allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status. + * If any of the fields set by the setters are not well-formed, the status + * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will + * not change after the ulocbld_buildLocaleID() call and the caller is + * free to keep using the same builder to build more locales. + * + * @param builder the builder + * @param locale the locale id + * @param localeCapacity the size of the locale buffer to store the locale id + * @param err the error code + * @return the length of the locale id in buffer + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale, + int32_t localeCapacity, UErrorCode* err); + +/** + * Build the ULocale object from the fields set on this builder. + * If any set methods or during the ulocbld_buildULocale() call require memory + * allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status. + * If any of the fields set by the setters are not well-formed, the status + * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will + * not change after the ulocbld_buildULocale() call and the caller is + * free to keep using the same builder to build more locales. + * + * @param builder the builder. + * @param err the error code. + * @return the locale, a ULocale* pointer. The created ULocale must be + * destroyed by calling {@link ulocale_close}. + * @draft ICU 74 + */ +U_CAPI ULocale* U_EXPORT2 +ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err); + +/** + * Build the IETF BCP 47 language tag string from the fields set on this builder. + * If any set methods or during the ulocbld_buildLanguageTag() call require memory + * allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status. + * If any of the fields set by the setters are not well-formed, the status + * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will + * not change after the ulocbld_buildLanguageTag() call and the caller is free + * to keep using the same builder to build more locales. + * + * @param builder the builder + * @param language the language tag + * @param languageCapacity the size of the language buffer to store the language + * tag + * @param err the error code + * @return the length of the language tag in buffer + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language, + int32_t languageCapacity, UErrorCode* err); + +/** + * Sets the UErrorCode if an error occurred while recording sets. + * Preserves older error codes in the outErrorCode. + * + * @param builder the builder + * @param outErrorCode Set to an error code that occurred while setting subtags. + * Unchanged if there is no such error or if outErrorCode + * already contained an error. + * @return true if U_FAILURE(*outErrorCode) + * @draft ICU 74 + */ +U_CAPI UBool U_EXPORT2 +ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalULocaleBuilderPointer + * "Smart pointer" class, closes a ULocaleBuilder via ulocbld_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 74 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleBuilderPointer, ULocaleBuilder, ulocbld_close); + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif /* U_HIDE_DRAFT_API */ + +#endif // __ULOCBUILDER_H__ diff --git a/yass/third_party/icu/source/common/unicode/unorm2.h b/yass/third_party/icu/source/common/unicode/unorm2.h index 24417b7103..3844041f17 100644 --- a/yass/third_party/icu/source/common/unicode/unorm2.h +++ b/yass/third_party/icu/source/common/unicode/unorm2.h @@ -181,7 +181,10 @@ U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getNFKDInstance(UErrorCode *pErrorCode); /** - * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. + * Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization + * which is equivalent to applying the NFKC_Casefold mappings and then NFC. + * See https://www.unicode.org/reports/tr44/#NFKC_Casefold + * * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). * Returns an unmodifiable singleton instance. Do not delete it. * @param pErrorCode Standard ICU error code. Its input value must @@ -194,6 +197,25 @@ unorm2_getNFKDInstance(UErrorCode *pErrorCode); U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); +#ifndef U_HIDE_DRAFT_API +/** + * Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization + * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC. + * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold + * + * Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 74 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode); +#endif // U_HIDE_DRAFT_API + /** * Returns a UNormalizer2 instance which uses the specified data file * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) @@ -206,7 +228,7 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. * * @param packageName NULL for ICU built-in data, otherwise application data package name - * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file * @param mode normalization mode (compose or decompose etc.) * @param pErrorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns diff --git a/yass/third_party/icu/source/common/unicode/urename.h b/yass/third_party/icu/source/common/unicode/urename.h index b35df45380..74f2cae510 100644 --- a/yass/third_party/icu/source/common/unicode/urename.h +++ b/yass/third_party/icu/source/common/unicode/urename.h @@ -138,8 +138,8 @@ #define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) #define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) #define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default) +#define mixedMeasuresToMicros U_ICU_ENTRY_POINT_RENAME(mixedMeasuresToMicros) #define numSysCleanup U_ICU_ENTRY_POINT_RENAME(numSysCleanup) -#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup) #define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun) #define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun) #define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun) @@ -193,6 +193,7 @@ #define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns) #define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns) #define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns) +#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup) #define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems) #define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource) #define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) @@ -512,9 +513,6 @@ #define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText) #define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText) #define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap) -#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys) -#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey) -#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys) #define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add) #define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear) #define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField) @@ -532,6 +530,7 @@ #define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) #define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) #define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone) +#define ucal_getIanaTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getIanaTimeZoneID) #define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) #define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) #define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) @@ -587,6 +586,7 @@ #define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) #define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) #define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) +#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8) #define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) #define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) #define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) @@ -955,9 +955,16 @@ #define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close) #define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next) #define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open) +#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit) +#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer) +#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io) +#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit) #define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) #define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) +#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou) #define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) +#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode) +#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue) #define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) #define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) #define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) @@ -969,7 +976,11 @@ #define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) #define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) #define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) +#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit) #define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) +#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou) +#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64) +#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop) #define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString) #define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition) #define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) @@ -1133,6 +1144,39 @@ #define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType) #define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey) #define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType) +#define ulocale_close U_ICU_ENTRY_POINT_RENAME(ulocale_close) +#define ulocale_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocale_getBaseName) +#define ulocale_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocale_getKeywordValue) +#define ulocale_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocale_getKeywords) +#define ulocale_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocale_getLanguage) +#define ulocale_getLocaleID U_ICU_ENTRY_POINT_RENAME(ulocale_getLocaleID) +#define ulocale_getRegion U_ICU_ENTRY_POINT_RENAME(ulocale_getRegion) +#define ulocale_getScript U_ICU_ENTRY_POINT_RENAME(ulocale_getScript) +#define ulocale_getUnicodeKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocale_getUnicodeKeywordValue) +#define ulocale_getUnicodeKeywords U_ICU_ENTRY_POINT_RENAME(ulocale_getUnicodeKeywords) +#define ulocale_getVariant U_ICU_ENTRY_POINT_RENAME(ulocale_getVariant) +#define ulocale_isBogus U_ICU_ENTRY_POINT_RENAME(ulocale_isBogus) +#define ulocale_openForLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocale_openForLanguageTag) +#define ulocale_openForLocaleID U_ICU_ENTRY_POINT_RENAME(ulocale_openForLocaleID) +#define ulocbld_addUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ulocbld_addUnicodeLocaleAttribute) +#define ulocbld_adoptULocale U_ICU_ENTRY_POINT_RENAME(ulocbld_adoptULocale) +#define ulocbld_buildLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocbld_buildLanguageTag) +#define ulocbld_buildLocaleID U_ICU_ENTRY_POINT_RENAME(ulocbld_buildLocaleID) +#define ulocbld_buildULocale U_ICU_ENTRY_POINT_RENAME(ulocbld_buildULocale) +#define ulocbld_clear U_ICU_ENTRY_POINT_RENAME(ulocbld_clear) +#define ulocbld_clearExtensions U_ICU_ENTRY_POINT_RENAME(ulocbld_clearExtensions) +#define ulocbld_close U_ICU_ENTRY_POINT_RENAME(ulocbld_close) +#define ulocbld_copyErrorTo U_ICU_ENTRY_POINT_RENAME(ulocbld_copyErrorTo) +#define ulocbld_open U_ICU_ENTRY_POINT_RENAME(ulocbld_open) +#define ulocbld_removeUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ulocbld_removeUnicodeLocaleAttribute) +#define ulocbld_setExtension U_ICU_ENTRY_POINT_RENAME(ulocbld_setExtension) +#define ulocbld_setLanguage U_ICU_ENTRY_POINT_RENAME(ulocbld_setLanguage) +#define ulocbld_setLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocbld_setLanguageTag) +#define ulocbld_setLocale U_ICU_ENTRY_POINT_RENAME(ulocbld_setLocale) +#define ulocbld_setRegion U_ICU_ENTRY_POINT_RENAME(ulocbld_setRegion) +#define ulocbld_setScript U_ICU_ENTRY_POINT_RENAME(ulocbld_setScript) +#define ulocbld_setUnicodeLocaleKeyword U_ICU_ENTRY_POINT_RENAME(ulocbld_setUnicodeLocaleKeyword) +#define ulocbld_setVariant U_ICU_ENTRY_POINT_RENAME(ulocbld_setVariant) #define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close) #define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion) #define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter) @@ -1213,6 +1257,7 @@ #define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance) #define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance) #define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance) +#define unorm2_getNFKCSimpleCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCSimpleCasefoldInstance) #define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance) #define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition) #define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter) @@ -1349,6 +1394,7 @@ #define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) #define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) #define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) +#define uprv_currencyLeads U_ICU_ENTRY_POINT_RENAME(uprv_currencyLeads) #define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) #define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) #define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) @@ -1367,6 +1413,7 @@ #define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) #define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) #define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) +#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass) #define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) #define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) #define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) @@ -1763,6 +1810,9 @@ #define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64) #define usnumf_openForLocale U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocale) #define usnumf_openForLocaleAndGroupingStrategy U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocaleAndGroupingStrategy) +#define uspoof_areBidiConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusable) +#define uspoof_areBidiConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusableUTF8) +#define uspoof_areBidiConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusableUnicodeString) #define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable) #define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8) #define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString) @@ -1778,6 +1828,9 @@ #define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars) #define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales) #define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet) +#define uspoof_getBidiSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeleton) +#define uspoof_getBidiSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeletonUTF8) +#define uspoof_getBidiSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeletonUnicodeString) #define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks) #define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics) #define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel) diff --git a/yass/third_party/icu/source/common/unicode/ures.h b/yass/third_party/icu/source/common/unicode/ures.h index cc25b6e49c..babc01d426 100644 --- a/yass/third_party/icu/source/common/unicode/ures.h +++ b/yass/third_party/icu/source/common/unicode/ures.h @@ -25,6 +25,7 @@ #ifndef URES_H #define URES_H +#include "unicode/char16ptr.h" #include "unicode/utypes.h" #include "unicode/uloc.h" @@ -812,7 +813,7 @@ inline UnicodeString ures_getUnicodeString(const UResourceBundle *resB, UErrorCode* status) { UnicodeString result; int32_t len = 0; - const char16_t *r = ures_getString(resB, &len, status); + const char16_t *r = ConstChar16Ptr(ures_getString(resB, &len, status)); if(U_SUCCESS(*status)) { result.setTo(true, r, len); } else { @@ -837,7 +838,7 @@ inline UnicodeString ures_getNextUnicodeString(UResourceBundle *resB, const char ** key, UErrorCode* status) { UnicodeString result; int32_t len = 0; - const char16_t* r = ures_getNextString(resB, &len, key, status); + const char16_t* r = ConstChar16Ptr(ures_getNextString(resB, &len, key, status)); if(U_SUCCESS(*status)) { result.setTo(true, r, len); } else { @@ -859,7 +860,7 @@ inline UnicodeString ures_getUnicodeStringByIndex(const UResourceBundle *resB, int32_t indexS, UErrorCode* status) { UnicodeString result; int32_t len = 0; - const char16_t* r = ures_getStringByIndex(resB, indexS, &len, status); + const char16_t* r = ConstChar16Ptr(ures_getStringByIndex(resB, indexS, &len, status)); if(U_SUCCESS(*status)) { result.setTo(true, r, len); } else { @@ -882,7 +883,7 @@ inline UnicodeString ures_getUnicodeStringByKey(const UResourceBundle *resB, const char* key, UErrorCode* status) { UnicodeString result; int32_t len = 0; - const char16_t* r = ures_getStringByKey(resB, key, &len, status); + const char16_t* r = ConstChar16Ptr(ures_getStringByKey(resB, key, &len, status)); if(U_SUCCESS(*status)) { result.setTo(true, r, len); } else { diff --git a/yass/third_party/icu/source/common/unicode/uvernum.h b/yass/third_party/icu/source/common/unicode/uvernum.h index f0fc671b4b..1cdf8912f9 100644 --- a/yass/third_party/icu/source/common/unicode/uvernum.h +++ b/yass/third_party/icu/source/common/unicode/uvernum.h @@ -53,13 +53,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 73 +#define U_ICU_VERSION_MAJOR_NUM 74 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 1 +#define U_ICU_VERSION_MINOR_NUM 2 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -79,7 +79,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _73 +#define U_ICU_VERSION_SUFFIX _74 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -132,7 +132,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "73.1" +#define U_ICU_VERSION "74.2" /** * The current ICU library major version number as a string, for library name suffixes. @@ -145,13 +145,13 @@ * * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "73" +#define U_ICU_VERSION_SHORT "74" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "73.1" +#define U_ICU_DATA_VERSION "74.2" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/yass/third_party/icu/source/common/uniquecharstr.h b/yass/third_party/icu/source/common/uniquecharstr.h index 10cc924f7f..e5d3b73418 100644 --- a/yass/third_party/icu/source/common/uniquecharstr.h +++ b/yass/third_party/icu/source/common/uniquecharstr.h @@ -10,6 +10,7 @@ #include "charstr.h" #include "uassert.h" #include "uhash.h" +#include "cmemory.h" U_NAMESPACE_BEGIN @@ -47,22 +48,20 @@ public: } /** - * Adds a string and returns a unique number for it. - * The string's buffer contents must not change, nor move around in memory, + * Adds a NUL-terminated string and returns a unique number for it. + * The string must not change, nor move around in memory, * while this UniqueCharStrings is in use. - * The string contents must be NUL-terminated exactly at s.length(). * - * Best used with read-only-alias UnicodeString objects that point to - * stable storage, such as strings returned by resource bundle functions. + * Best used with string data in a stable storage, such as strings returned + * by resource bundle functions. */ - int32_t add(const UnicodeString &s, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return 0; } + int32_t add(const char16_t*p, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return -1; } if (isFrozen) { errorCode = U_NO_WRITE_PERMISSION; - return 0; + return -1; } // The string points into the resource bundle. - const char16_t *p = s.getBuffer(); int32_t oldIndex = uhash_geti(&map, p); if (oldIndex != 0) { // found duplicate return oldIndex; @@ -71,11 +70,33 @@ public: // The strings object is also terminated with one implicit NUL. strings->append(0, errorCode); int32_t newIndex = strings->length(); - strings->appendInvariantChars(s, errorCode); + strings->appendInvariantChars(p, u_strlen(p), errorCode); uhash_puti(&map, const_cast(p), newIndex, &errorCode); return newIndex; } + /** + * Adds a unicode string by value and returns a unique number for it. + */ + int32_t addByValue(UnicodeString s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return -1; } + if (isFrozen) { + errorCode = U_NO_WRITE_PERMISSION; + return -1; + } + int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer()); + if (oldIndex != 0) { // found duplicate + return oldIndex; + } + // We need to store the string content of the UnicodeString. + UnicodeString *key = keyStore.create(s); + if (key == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return -1; + } + return add(key->getTerminatedBuffer(), errorCode); + } + void freeze() { isFrozen = true; } /** @@ -90,6 +111,7 @@ public: private: UHashtable map; CharString *strings; + MemoryPool keyStore; bool isFrozen = false; }; diff --git a/yass/third_party/icu/source/common/uprops.cpp b/yass/third_party/icu/source/common/uprops.cpp index 28540186c6..314fa6fb86 100644 --- a/yass/third_party/icu/source/common/uprops.cpp +++ b/yass/third_party/icu/source/common/uprops.cpp @@ -328,6 +328,53 @@ static UBool hasEmojiProperty(const BinaryProperty &/*prop*/, UChar32 c, UProper return EmojiProps::hasBinaryProperty(c, which); } +static UBool isIDSUnaryOperator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + // New in Unicode 15.1 for just two characters. + return 0x2FFE<=c && c<=0x2FFF; +} + +/** Ranges (start/limit pairs) of ID_Compat_Math_Continue (only), from UCD PropList.txt. */ +static constexpr UChar32 ID_COMPAT_MATH_CONTINUE[] = { + 0x00B2, 0x00B3 + 1, + 0x00B9, 0x00B9 + 1, + 0x2070, 0x2070 + 1, + 0x2074, 0x207E + 1, + 0x2080, 0x208E + 1 +}; + +/** ID_Compat_Math_Start characters, from UCD PropList.txt. */ +static constexpr UChar32 ID_COMPAT_MATH_START[] = { + 0x2202, + 0x2207, + 0x221E, + 0x1D6C1, + 0x1D6DB, + 0x1D6FB, + 0x1D715, + 0x1D735, + 0x1D74F, + 0x1D76F, + 0x1D789, + 0x1D7A9, + 0x1D7C3 +}; + +static UBool isIDCompatMathStart(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + if (c < ID_COMPAT_MATH_START[0]) { return false; } // fastpath for common scripts + for (UChar32 startChar : ID_COMPAT_MATH_START) { + if (c == startChar) { return true; } + } + return false; +} + +static UBool isIDCompatMathContinue(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { + for (int32_t i = 0; i < UPRV_LENGTHOF(ID_COMPAT_MATH_CONTINUE); i += 2) { + if (c < ID_COMPAT_MATH_CONTINUE[i]) { return false; } // below range start + if (c < ID_COMPAT_MATH_CONTINUE[i + 1]) { return true; } // below range limit + } + return isIDCompatMathStart(prop, c, UCHAR_ID_COMPAT_MATH_START); +} + static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ /* * column and mask values for binary properties from u_getUnicodeProperties(). @@ -409,6 +456,9 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_TAG_SEQUENCE { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI + { UPROPS_SRC_IDSU, 0, isIDSUnaryOperator }, // UCHAR_IDS_UNARY_OPERATOR + { UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathStart }, // UCHAR_ID_COMPAT_MATH_START + { UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathContinue }, // UCHAR_ID_COMPAT_MATH_CONTINUE }; U_CAPI UBool U_EXPORT2 @@ -759,6 +809,19 @@ uprops_getSource(UProperty which) { U_CFUNC void U_EXPORT2 uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return; } + if (src == UPROPS_SRC_ID_COMPAT_MATH) { + // range limits + for (UChar32 c : ID_COMPAT_MATH_CONTINUE) { + sa->add(sa->set, c); + } + // single characters + for (UChar32 c : ID_COMPAT_MATH_START) { + sa->add(sa->set, c); + sa->add(sa->set, c + 1); + } + return; + } if (!ulayout_ensureData(*pErrorCode)) { return; } const UCPTrie *trie; switch (src) { diff --git a/yass/third_party/icu/source/common/uprops.h b/yass/third_party/icu/source/common/uprops.h index 1e06d03519..80347eab57 100644 --- a/yass/third_party/icu/source/common/uprops.h +++ b/yass/third_party/icu/source/common/uprops.h @@ -379,6 +379,8 @@ enum UPropertySource { UPROPS_SRC_INSC, UPROPS_SRC_VO, UPROPS_SRC_EMOJI, + UPROPS_SRC_IDSU, + UPROPS_SRC_ID_COMPAT_MATH, /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */ UPROPS_SRC_COUNT }; diff --git a/yass/third_party/icu/source/common/uresbund.cpp b/yass/third_party/icu/source/common/uresbund.cpp index d02bba8921..5aa1c5fa2f 100644 --- a/yass/third_party/icu/source/common/uresbund.cpp +++ b/yass/third_party/icu/source/common/uresbund.cpp @@ -24,6 +24,7 @@ #include "unicode/ures.h" #include "unicode/ustring.h" #include "unicode/ucnv.h" +#include "bytesinkutil.h" #include "charstr.h" #include "uresimp.h" #include "ustr_imp.h" @@ -2351,7 +2352,66 @@ struct GetAllChildrenSink : public ResourceSink { aliasedValue.setData(aliasRB->getResData()); aliasedValue.setValidLocaleDataEntry(aliasRB->fValidLocaleDataEntry); aliasedValue.setResource(aliasRB->fRes, ResourceTracer(aliasRB)); - dest.put(key, aliasedValue, isRoot, errorCode); + + if (aliasedValue.getType() != URES_TABLE) { + dest.put(key, aliasedValue, isRoot, errorCode); + } else { + // if the resource we're aliasing over to is a table, the sink might iterate over its contents. + // If it does, it'll get only the things defined in the actual alias target, not the things + // the target inherits from its parent resources. So we walk the parent chain for the *alias target*, + // calling dest.put() for each of the parent tables we could be inheriting from. This means + // that dest.put() has to iterate over the children of multiple tables to get all of the inherited + // resource values, but it already has to do that to handle normal vertical inheritance. + UResType aliasedValueType = URES_TABLE; + CharString tablePath; + tablePath.append(aliasRB->fResPath, errorCode); + const char* parentKey = key; // dest.put() changes the key + dest.put(parentKey, aliasedValue, isRoot, errorCode); + UResourceDataEntry* entry = aliasRB->fData; + Resource res = aliasRB->fRes; + while (aliasedValueType == URES_TABLE && entry->fParent != nullptr) { + CharString localPath; + localPath.copyFrom(tablePath, errorCode); + char* localPathAsCharPtr = localPath.data(); + const char* childKey; + entry = entry->fParent; + res = entry->fData.rootRes; + Resource newRes = res_findResource(&entry->fData, res, &localPathAsCharPtr, &childKey); + if (newRes != RES_BOGUS) { + aliasedValue.setData(entry->fData); + // TODO: do I also need to call aliasedValue.setValueLocaleDataEntry() ? + aliasedValue.setResource(newRes, ResourceTracer(aliasRB)); // probably wrong to use aliasRB here + aliasedValueType = aliasedValue.getType(); + if (aliasedValueType == URES_ALIAS) { + // in a few rare cases, when we get to the root resource bundle, the resource in question + // won't be an actual table, but will instead be an alias to a table. That is, we have + // two aliases in the inheritance path. (For some locales, such as Zulu, we see this with + // children of the "fields" resource: "day-narrow" aliases to "day-short", which aliases + // to "day".) When this happens, we need to make sure we follow all the aliases. + ResourceDataValue& rdv2 = static_cast(aliasedValue); + aliasRB = getAliasTargetAsResourceBundle(rdv2.getData(), rdv2.getResource(), nullptr, -1, + rdv2.getValidLocaleDataEntry(), nullptr, 0, + stackTempBundle.getAlias(), &errorCode); + tablePath.clear(); + tablePath.append(aliasRB->fResPath, errorCode); + entry = aliasRB->fData; + res = aliasRB->fRes; + aliasedValue.setData(entry->fData); + // TODO: do I also need to call aliasedValue.setValueLocaleDataEntry() ? + aliasedValue.setResource(res, ResourceTracer(aliasRB)); // probably wrong to use aliasRB here + aliasedValueType = aliasedValue.getType(); + } + if (aliasedValueType == URES_TABLE) { + dest.put(parentKey, aliasedValue, isRoot, errorCode); + } else { + // once we've followed the alias, the resource we're looking at really should + // be a table + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + } + } + } } } else { dest.put(key, value, isRoot, errorCode); @@ -2657,13 +2717,16 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID, UResourceDataEntry *entry; if(openType != URES_OPEN_DIRECT) { /* first "canonicalize" the locale ID */ - char canonLocaleID[ULOC_FULLNAME_CAPACITY]; - uloc_getBaseName(localeID, canonLocaleID, UPRV_LENGTHOF(canonLocaleID), status); - if(U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) { + CharString canonLocaleID; + { + CharStringByteSink sink(&canonLocaleID); + ulocimp_getBaseName(localeID, sink, status); + } + if(U_FAILURE(*status)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return nullptr; } - entry = entryOpen(path, canonLocaleID, openType, status); + entry = entryOpen(path, canonLocaleID.data(), openType, status); } else { entry = entryOpenDirect(path, localeID, status); } @@ -2974,15 +3037,39 @@ static UBool isLocaleInList(UEnumeration *locEnum, const char *locToSearch, UErr return false; } +static void getParentForFunctionalEquivalent(const char* localeID, + UResourceBundle* res, + UResourceBundle* bund1, + char* parent, + int32_t parentCapacity) { + // Get parent. + // First check for a parent from %%Parent resource (Note that in resource trees + // such as collation, data may have different parents than in parentLocales). + UErrorCode subStatus = U_ZERO_ERROR; + parent[0] = '\0'; + if (res != NULL) { + ures_getByKey(res, "%%Parent", bund1, &subStatus); + if (U_SUCCESS(subStatus)) { + int32_t parentLen = parentCapacity; + ures_getUTF8String(bund1, parent, &parentLen, true, &subStatus); + } + } + + // If none there, use normal truncation parent + if (U_FAILURE(subStatus) || parent[0] == 0) { + subStatus = U_ZERO_ERROR; + uloc_getParent(localeID, parent, parentCapacity, &subStatus); + } +} + U_CAPI int32_t U_EXPORT2 ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *path, const char *resName, const char *keyword, const char *locid, UBool *isAvailable, UBool omitDefault, UErrorCode *status) { - char kwVal[1024] = ""; /* value of keyword 'keyword' */ char defVal[1024] = ""; /* default value for given locale */ char defLoc[1024] = ""; /* default value for given locale */ - char base[1024] = ""; /* base locale */ + CharString base; /* base locale */ char found[1024] = ""; char parent[1024] = ""; char full[1024] = ""; @@ -2991,23 +3078,29 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, UErrorCode subStatus = U_ZERO_ERROR; int32_t length = 0; if(U_FAILURE(*status)) return 0; - uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus); - if(!uprv_strcmp(kwVal, DEFAULT_TAG)) { - kwVal[0]=0; + CharString kwVal; + { + CharStringByteSink sink(&kwVal); + ulocimp_getKeywordValue(locid, keyword, sink, &subStatus); + } + if(kwVal == DEFAULT_TAG) { + kwVal.clear(); + } + { + CharStringByteSink sink(&base); + ulocimp_getBaseName(locid, sink, &subStatus); } - uloc_getBaseName(locid, base, 1024-1,&subStatus); #if defined(URES_TREE_DEBUG) fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", - locid, keyword, kwVal, base, u_errorName(subStatus)); + locid, keyword, kwVal.data(), base.data(), u_errorName(subStatus)); #endif ures_initStackObject(&bund1); ures_initStackObject(&bund2); - - - uprv_strcpy(parent, base); - uprv_strcpy(found, base); - if(isAvailable) { + base.extract(parent, UPRV_LENGTHOF(parent), subStatus); + base.extract(found, UPRV_LENGTHOF(found), subStatus); + + if(isAvailable) { UEnumeration *locEnum = ures_openAvailableLocales(path, &subStatus); *isAvailable = true; if (U_SUCCESS(subStatus)) { @@ -3054,11 +3147,11 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus)); #endif uprv_strcpy(defLoc, parent); - if(kwVal[0]==0) { - uprv_strcpy(kwVal, defVal); + if(kwVal.isEmpty()) { + kwVal.append(defVal, defLen, subStatus); #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> kwVal = %s\n", - path?path:"ICUDATA", parent, keyword, kwVal); + path?path:"ICUDATA", parent, keyword, kwVal.data()); #endif } } @@ -3071,16 +3164,19 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, uprv_strcpy(found, ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus)); } - uloc_getParent(found,parent,sizeof(parent),&subStatus); + if (uprv_strcmp(found, parent) != 0) { + uprv_strcpy(parent, found); + } else { + getParentForFunctionalEquivalent(found,res,&bund1,parent,sizeof(parent)); + } ures_close(res); } while(!defVal[0] && *found && uprv_strcmp(found, "root") != 0 && U_SUCCESS(*status)); /* Now, see if we can find the kwVal collator.. start the search over.. */ - uprv_strcpy(parent, base); - uprv_strcpy(found, base); - + base.extract(parent, UPRV_LENGTHOF(parent), subStatus); + base.extract(found, UPRV_LENGTHOF(found), subStatus); + do { - subStatus = U_ZERO_ERROR; res = ures_open(path, parent, &subStatus); if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) { *isAvailable = false; @@ -3089,7 +3185,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> %s (looking for %s)\n", - path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); + path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal.data()); #endif if(U_FAILURE(subStatus)) { *status = subStatus; @@ -3099,14 +3195,14 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, /**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus)); #endif if(subStatus == U_ZERO_ERROR) { - ures_getByKey(&bund1, kwVal, &bund2, &subStatus); + ures_getByKey(&bund1, kwVal.data(), &bund2, &subStatus); #if defined(URES_TREE_DEBUG) -/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus)); +/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal.data(), u_errorName(subStatus)); #endif if(subStatus == U_ZERO_ERROR) { #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n", - path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus)); + path?path:"ICUDATA", parent, keyword, kwVal.data(), u_errorName(subStatus)); #endif uprv_strcpy(full, parent); if(*full == 0) { @@ -3139,29 +3235,52 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, } else { #if defined(URES_TREE_DEBUG) fprintf(stderr, "err=%s in %s looking for %s\n", - u_errorName(subStatus), parent, kwVal); + u_errorName(subStatus), parent, kwVal.data()); #endif } } } - subStatus = U_ZERO_ERROR; - - uprv_strcpy(found, parent); - uloc_getParent(found,parent,1023,&subStatus); - ures_close(res); - } while(!full[0] && *found && U_SUCCESS(*status)); - - if((full[0]==0) && uprv_strcmp(kwVal, defVal)) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal); -#endif - uprv_strcpy(kwVal, defVal); - uprv_strcpy(parent, base); - uprv_strcpy(found, base); - - do { /* search for 'default' named item */ + UBool haveFound = false; + // At least for collations which may be aliased, we need to use the VALID locale + // as the parent instead of just truncating, as long as the VALID locale is not + // root and has a different language than the parent. Use of the VALID locale + // here is similar to the procedure used at the end of the previous do-while loop + // for all resource types. + if (res != NULL && uprv_strcmp(resName, "collations") == 0) { subStatus = U_ZERO_ERROR; + const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus); + if (U_SUCCESS(subStatus) && validLoc != NULL && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) { + char validLang[ULOC_LANG_CAPACITY]; + char parentLang[ULOC_LANG_CAPACITY]; + uloc_getLanguage(validLoc, validLang, ULOC_LANG_CAPACITY, &subStatus); + uloc_getLanguage(parent, parentLang, ULOC_LANG_CAPACITY, &subStatus); + if (U_SUCCESS(subStatus) && uprv_strcmp(validLang, parentLang) != 0) { + // validLoc is not root and has a different language than parent, use it instead + uprv_strcpy(found, validLoc); + haveFound = true; + } + } + subStatus = U_ZERO_ERROR; + } + if (!haveFound) { + uprv_strcpy(found, parent); + } + + getParentForFunctionalEquivalent(found,res,&bund1,parent,1023); + ures_close(res); + subStatus = U_ZERO_ERROR; + } while(!full[0] && *found && U_SUCCESS(*status)); + + if((full[0]==0) && kwVal != defVal) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal.data(), defVal); +#endif + kwVal.clear().append(defVal, subStatus); + base.extract(parent, UPRV_LENGTHOF(parent), subStatus); + base.extract(found, UPRV_LENGTHOF(found), subStatus); + + do { /* search for 'default' named item */ res = ures_open(path, parent, &subStatus); if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) { *isAvailable = false; @@ -3170,18 +3289,18 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> %s (looking for default %s)\n", - path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); + path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal.data()); #endif if(U_FAILURE(subStatus)) { *status = subStatus; } else if(subStatus == U_ZERO_ERROR) { ures_getByKey(res,resName,&bund1, &subStatus); if(subStatus == U_ZERO_ERROR) { - ures_getByKey(&bund1, kwVal, &bund2, &subStatus); + ures_getByKey(&bund1, kwVal.data(), &bund2, &subStatus); if(subStatus == U_ZERO_ERROR) { #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA", - parent, keyword, kwVal, u_errorName(subStatus)); + parent, keyword, kwVal.data(), u_errorName(subStatus)); #endif uprv_strcpy(full, parent); if(*full == 0) { @@ -3215,18 +3334,18 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, } } } - subStatus = U_ZERO_ERROR; uprv_strcpy(found, parent); - uloc_getParent(found,parent,1023,&subStatus); + getParentForFunctionalEquivalent(found,res,&bund1,parent,1023); ures_close(res); + subStatus = U_ZERO_ERROR; } while(!full[0] && *found && U_SUCCESS(*status)); } if(U_SUCCESS(*status)) { if(!full[0]) { #if defined(URES_TREE_DEBUG) - fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal); + fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal.data()); #endif *status = U_MISSING_RESOURCE_ERROR; } else if(omitDefault) { @@ -3235,21 +3354,21 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, #endif if(uprv_strlen(defLoc) <= uprv_strlen(full)) { /* found the keyword in a *child* of where the default tag was present. */ - if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */ + if(kwVal == defVal) { /* if the requested kw is default, */ /* and the default is in or in an ancestor of the current locale */ #if defined(URES_TREE_DEBUG) - fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal); + fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal.data()); #endif - kwVal[0]=0; + kwVal.clear(); } } } uprv_strcpy(found, full); - if(kwVal[0]) { + if(!kwVal.isEmpty()) { uprv_strcat(found, "@"); uprv_strcat(found, keyword); uprv_strcat(found, "="); - uprv_strcat(found, kwVal); + uprv_strcat(found, kwVal.data()); } else if(!omitDefault) { uprv_strcat(found, "@"); uprv_strcat(found, keyword); diff --git a/yass/third_party/icu/source/common/ustrcase.cpp b/yass/third_party/icu/source/common/ustrcase.cpp index 537b5ba857..e4aec8a1c7 100644 --- a/yass/third_party/icu/source/common/ustrcase.cpp +++ b/yass/third_party/icu/source/common/ustrcase.cpp @@ -1130,14 +1130,18 @@ int32_t toUpper(uint32_t options, // Adding one only to the final vowel in a longer sequence // (which does not occur in normal writing) would require lookahead. // Set the same flag as for preserving an existing dialytika. - if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && - (upper == 0x399 || upper == 0x3A5)) { - data |= HAS_DIALYTIKA; + if ((data & HAS_VOWEL) != 0 && + (state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) != + 0 && + (upper == 0x399 || upper == 0x3A5)) { + data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) ? HAS_DIALYTIKA + : HAS_COMBINING_DIALYTIKA; } int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. if ((data & HAS_YPOGEGRAMMENI) != 0) { numYpogegrammeni = 1; } + const UBool hasPrecomposedAccent = (data & HAS_ACCENT) != 0; // Skip combining diacritics after this Greek letter. while (nextIndex < srcLength) { uint32_t diacriticData = getDiacriticData(src[nextIndex]); @@ -1152,7 +1156,8 @@ int32_t toUpper(uint32_t options, } } if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { - nextState |= AFTER_VOWEL_WITH_ACCENT; + nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT + : AFTER_VOWEL_WITH_COMBINING_ACCENT; } // Map according to Greek rules. UBool addTonos = false; @@ -1163,7 +1168,7 @@ int32_t toUpper(uint32_t options, !isFollowedByCasedLetter(src, nextIndex, srcLength)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. - if (i == nextIndex) { + if (hasPrecomposedAccent) { upper = 0x389; // Preserve the precomposed form. } else { addTonos = true; diff --git a/yass/third_party/icu/source/common/uts46.cpp b/yass/third_party/icu/source/common/uts46.cpp index fce2af4130..28f15f7175 100644 --- a/yass/third_party/icu/source/common/uts46.cpp +++ b/yass/third_party/icu/source/common/uts46.cpp @@ -669,14 +669,6 @@ UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart return length; } -// Some non-ASCII characters are equivalent to sequences with -// non-LDH ASCII characters. To find them: -// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) -static inline UBool -isNonASCIIDisallowedSTD3Valid(UChar32 c) { - return c==0x2260 || c==0x226E || c==0x226F; -} - // Replace the label in dest with the label string, if the label was modified. // If &label==&dest then the label was modified in-place and labelLength // is the new label length, different from label.length(). @@ -820,10 +812,7 @@ UTS46::processLabel(UnicodeString &dest, } } else { oredChars|=c; - if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { - info.labelErrors|=UIDNA_ERROR_DISALLOWED; - *s=0xfffd; - } else if(c==0xfffd) { + if(c==0xfffd) { info.labelErrors|=UIDNA_ERROR_DISALLOWED; } } diff --git a/yass/third_party/icu/source/i18n/calendar.cpp b/yass/third_party/icu/source/i18n/calendar.cpp index d87a9baf0e..4b6edc87c9 100644 --- a/yass/third_party/icu/source/i18n/calendar.cpp +++ b/yass/third_party/icu/source/i18n/calendar.cpp @@ -63,6 +63,8 @@ #include "sharedcalendar.h" #include "unifiedcache.h" #include "ulocimp.h" +#include "bytesinkutil.h" +#include "charstr.h" #if !UCONFIG_NO_SERVICE static icu::ICULocaleService* gService = nullptr; @@ -247,46 +249,32 @@ static UBool isStandardSupportedKeyword(const char *keyword, UErrorCode& status) return (calType != CALTYPE_UNKNOWN); } -// only used with service registration. -static void getCalendarKeyword(const UnicodeString &id, char *targetBuffer, int32_t targetBufferSize) { - UnicodeString calendarKeyword = UNICODE_STRING_SIMPLE("calendar="); - int32_t calKeyLen = calendarKeyword.length(); - int32_t keyLen = 0; - - int32_t keywordIdx = id.indexOf((char16_t)0x003D); /* '=' */ - if (id[0] == 0x40/*'@'*/ - && id.compareBetween(1, keywordIdx+1, calendarKeyword, 0, calKeyLen) == 0) - { - keyLen = id.extract(keywordIdx+1, id.length(), targetBuffer, targetBufferSize, US_INV); - } - targetBuffer[keyLen] = 0; -} #endif static ECalType getCalendarTypeForLocale(const char *locid) { UErrorCode status = U_ZERO_ERROR; ECalType calType = CALTYPE_UNKNOWN; - //TODO: ULOC_FULL_NAME is out of date and too small.. - char canonicalName[256]; - // Canonicalize, so that an old-style variant will be transformed to keywords. // e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese // NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and // the Gregorian calendar is returned instead. - int32_t canonicalLen = uloc_canonicalize(locid, canonicalName, sizeof(canonicalName) - 1, &status); + CharString canonicalName; + { + CharStringByteSink sink(&canonicalName); + ulocimp_canonicalize(locid, sink, &status); + } if (U_FAILURE(status)) { return CALTYPE_GREGORIAN; } - canonicalName[canonicalLen] = 0; // terminate - char calTypeBuf[32]; - int32_t calTypeBufLen; - - calTypeBufLen = uloc_getKeywordValue(canonicalName, "calendar", calTypeBuf, sizeof(calTypeBuf) - 1, &status); + CharString calTypeBuf; + { + CharStringByteSink sink(&calTypeBuf); + ulocimp_getKeywordValue(canonicalName.data(), "calendar", sink, &status); + } if (U_SUCCESS(status)) { - calTypeBuf[calTypeBufLen] = 0; - calType = getCalendarType(calTypeBuf); + calType = getCalendarType(calTypeBuf.data()); if (calType != CALTYPE_UNKNOWN) { return calType; } @@ -296,7 +284,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) { // when calendar keyword is not available or not supported, read supplementalData // to get the default calendar type for the locale's region char region[ULOC_COUNTRY_CAPACITY]; - (void)ulocimp_getRegionForSupplementalData(canonicalName, true, region, sizeof(region), &status); + (void)ulocimp_getRegionForSupplementalData(canonicalName.data(), true, region, sizeof(region), &status); if (U_FAILURE(status)) { return CALTYPE_GREGORIAN; } @@ -310,16 +298,13 @@ static ECalType getCalendarTypeForLocale(const char *locid) { order = ures_getByKey(rb, "001", nullptr, &status); } - calTypeBuf[0] = 0; + calTypeBuf.clear(); if (U_SUCCESS(status) && order != nullptr) { // the first calendar type is the default for the region int32_t len = 0; const char16_t *uCalType = ures_getStringByIndex(order, 0, &len, &status); - if (len < (int32_t)sizeof(calTypeBuf)) { - u_UCharsToChars(uCalType, calTypeBuf, len); - *(calTypeBuf + len) = 0; // terminate; - calType = getCalendarType(calTypeBuf); - } + calTypeBuf.appendInvariantChars(uCalType, len, status); + calType = getCalendarType(calTypeBuf.data()); } ures_close(order); @@ -458,10 +443,7 @@ protected: lkey->canonicalLocale(canLoc); char keyword[ULOC_FULLNAME_CAPACITY]; - UnicodeString str; - - key.currentID(str); - getCalendarKeyword(str, keyword, (int32_t) sizeof(keyword)); + curLoc.getKeywordValue("calendar", keyword, (int32_t) sizeof(keyword), status); #ifdef U_DEBUG_CALSVC fprintf(stderr, "BasicCalendarFactory::create() - cur %s, can %s\n", (const char*)curLoc.getName(), (const char*)canLoc.getName()); @@ -1186,6 +1168,9 @@ Calendar::setTimeInMillis( double millis, UErrorCode& status ) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } + } else if (uprv_isNaN(millis)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; } fTime = millis; @@ -1206,6 +1191,13 @@ Calendar::setTimeInMillis( double millis, UErrorCode& status ) { int32_t Calendar::get(UCalendarDateFields field, UErrorCode& status) const { + if (U_FAILURE(status)) { + return 0; + } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } // field values are only computed when actually requested; for more on when computation // of various things happens, see the "data flow in Calendar" description at the top // of this file @@ -1218,6 +1210,9 @@ Calendar::get(UCalendarDateFields field, UErrorCode& status) const void Calendar::set(UCalendarDateFields field, int32_t value) { + if (field < 0 || field >= UCAL_FIELD_COUNT) { + return; + } if (fAreFieldsVirtuallySet) { UErrorCode ec = U_ZERO_ERROR; computeFields(ec); @@ -1299,13 +1294,25 @@ Calendar::clear() void Calendar::clear(UCalendarDateFields field) { + if (field < 0 || field >= UCAL_FIELD_COUNT) { + return; + } if (fAreFieldsVirtuallySet) { UErrorCode ec = U_ZERO_ERROR; computeFields(ec); } fFields[field] = 0; fStamp[field] = kUnset; - fIsSet[field] = false; // Remove later + if (field == UCAL_MONTH) { + fFields[UCAL_ORDINAL_MONTH] = 0; + fStamp[UCAL_ORDINAL_MONTH] = kUnset; + fIsSet[UCAL_ORDINAL_MONTH] = false; // Remove later + } + if (field == UCAL_ORDINAL_MONTH) { + fFields[UCAL_MONTH] = 0; + fStamp[UCAL_MONTH] = kUnset; + fIsSet[UCAL_MONTH] = false; // Remove later + } fIsTimeSet = fAreFieldsSet = fAreAllFieldsSet = fAreFieldsVirtuallySet = false; } @@ -1314,6 +1321,9 @@ Calendar::clear(UCalendarDateFields field) UBool Calendar::isSet(UCalendarDateFields field) const { + if (field < 0 || field >= UCAL_FIELD_COUNT) { + return false; + } return fAreFieldsVirtuallySet || (fStamp[field] != kUnset); } @@ -1392,6 +1402,10 @@ void Calendar::pinField(UCalendarDateFields field, UErrorCode& status) { if (U_FAILURE(status)) { return; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } int32_t max = getActualMaximum(field, status); int32_t min = getActualMinimum(field, status); @@ -1727,6 +1741,10 @@ void Calendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode& statu if(U_FAILURE(status)) { return; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } switch (field) { case UCAL_DAY_OF_MONTH: case UCAL_AM_PM: @@ -1835,7 +1853,6 @@ void Calendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode& statu } set(field, newYear); pinField(UCAL_MONTH,status); - pinField(UCAL_ORDINAL_MONTH,status); pinField(UCAL_DAY_OF_MONTH,status); return; } @@ -1844,7 +1861,6 @@ void Calendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode& statu // Rolling the year can involve pinning the DAY_OF_MONTH. set(field, internalGet(field) + amount); pinField(UCAL_MONTH,status); - pinField(UCAL_ORDINAL_MONTH,status); pinField(UCAL_DAY_OF_MONTH,status); return; @@ -1911,6 +1927,10 @@ void Calendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode& statu // Now roll between start and (limit - 1). int32_t gap = limit - start; + if (gap == 0) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } int32_t day_of_month = (internalGet(UCAL_DAY_OF_MONTH) + amount*7 - start) % gap; if (day_of_month < 0) day_of_month += gap; @@ -1969,6 +1989,10 @@ void Calendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode& statu // Now roll between start and (limit - 1). int32_t gap = limit - start; + if (gap == 0) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } int32_t day_of_year = (internalGet(UCAL_DAY_OF_YEAR) + amount*7 - start) % gap; if (day_of_year < 0) day_of_year += gap; @@ -2071,6 +2095,10 @@ void Calendar::add(UCalendarDateFields field, int32_t amount, UErrorCode& status if (U_FAILURE(status)) { return; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } if (amount == 0) { return; // Do nothing! } @@ -2257,7 +2285,13 @@ int32_t Calendar::fieldDifference(UDate when, EDateFields field, UErrorCode& sta } int32_t Calendar::fieldDifference(UDate targetMs, UCalendarDateFields field, UErrorCode& ec) { - if (U_FAILURE(ec)) return 0; + if (U_FAILURE(ec)) { + return 0; + } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } int32_t min = 0; double startMs = getTimeInMillis(ec); // Always add from the start millis. This accommodates @@ -2362,7 +2396,9 @@ void Calendar::adoptTimeZone(TimeZone* zone) { // Do nothing if passed-in zone is nullptr - if (zone == nullptr) return; + if (zone == nullptr) { + return; + } // fZone should always be non-null delete fZone; @@ -2715,6 +2751,10 @@ Calendar::getActualMinimum(UCalendarDateFields field, UErrorCode& status) const if (U_FAILURE(status)) { return 0; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } int32_t fieldValue = getGreatestMinimum(field); int32_t endValue = getMinimum(field); @@ -2790,7 +2830,9 @@ const char* Calendar::getTemporalMonthCode(UErrorCode& status) const { int32_t month = get(UCAL_MONTH, status); - if (U_FAILURE(status)) return nullptr; + if (U_FAILURE(status)) { + return nullptr; + } U_ASSERT(month < 12); U_ASSERT(internalGet(UCAL_IS_LEAP_MONTH) == 0); return gTemporalMonthCodes[month]; @@ -2799,7 +2841,9 @@ Calendar::getTemporalMonthCode(UErrorCode& status) const void Calendar::setTemporalMonthCode(const char* code, UErrorCode& status ) { - if (U_FAILURE(status)) return; + if (U_FAILURE(status)) { + return; + } int32_t len = static_cast(uprv_strlen(code)); if (len == 3 && code[0] == 'M') { for (int m = 0; gTemporalMonthCodes[m] != nullptr; m++) { @@ -2844,6 +2888,10 @@ void Calendar::validateField(UCalendarDateFields field, UErrorCode &status) { if (U_FAILURE(status)) { return; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } int32_t y; switch (field) { case UCAL_DAY_OF_MONTH: @@ -2883,6 +2931,10 @@ void Calendar::validateField(UCalendarDateFields field, int32_t min, int32_t max if (U_FAILURE(status)) { return; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } int32_t value = fFields[field]; if (value < min || value > max) { #if defined (U_DEBUG_CAL) @@ -3646,13 +3698,19 @@ Calendar::getActualMaximum(UCalendarDateFields field, UErrorCode& status) const if (U_FAILURE(status)) { return 0; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } int32_t result; switch (field) { case UCAL_DATE: { - if(U_FAILURE(status)) return 0; Calendar *cal = clone(); - if(!cal) { status = U_MEMORY_ALLOCATION_ERROR; return 0; } + if(!cal) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } cal->setLenient(true); cal->prepareGetActual(field,false,status); result = handleGetMonthLength(cal->get(UCAL_EXTENDED_YEAR, status), cal->get(UCAL_MONTH, status)); @@ -3662,9 +3720,11 @@ Calendar::getActualMaximum(UCalendarDateFields field, UErrorCode& status) const case UCAL_DAY_OF_YEAR: { - if(U_FAILURE(status)) return 0; Calendar *cal = clone(); - if(!cal) { status = U_MEMORY_ALLOCATION_ERROR; return 0; } + if(!cal) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } cal->setLenient(true); cal->prepareGetActual(field,false,status); result = handleGetYearLength(cal->get(UCAL_EXTENDED_YEAR, status)); @@ -3727,6 +3787,10 @@ void Calendar::prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErr if (U_FAILURE(status)) { return; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } set(UCAL_MILLISECONDS_IN_DAY, 0); switch (field) { @@ -3785,6 +3849,10 @@ int32_t Calendar::getActualHelper(UCalendarDateFields field, int32_t startValue, if (U_FAILURE(status)) { return 0; } + if (field < 0 || field >= UCAL_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } if (startValue == endValue) { // if we know that the maximum value is always the same, just return it return startValue; @@ -3794,9 +3862,14 @@ int32_t Calendar::getActualHelper(UCalendarDateFields field, int32_t startValue, // clone the calendar so we don't mess with the real one, and set it to // accept anything for the field values - if(U_FAILURE(status)) return startValue; + if(U_FAILURE(status)) { + return startValue; + } Calendar *work = clone(); - if(!work) { status = U_MEMORY_ALLOCATION_ERROR; return startValue; } + if(!work) { + status = U_MEMORY_ALLOCATION_ERROR; + return startValue; + } // need to resolve time here, otherwise, fields set for actual limit // may cause conflict with fields previously set (but not yet resolved). @@ -3850,7 +3923,9 @@ void Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode& status) { - if (U_FAILURE(status)) return; + if (U_FAILURE(status)) { + return; + } fFirstDayOfWeek = UCAL_SUNDAY; fMinimalDaysInFirstWeek = 1; diff --git a/yass/third_party/icu/source/i18n/chnsecal.cpp b/yass/third_party/icu/source/i18n/chnsecal.cpp index 58685632e0..c7c573bacc 100644 --- a/yass/third_party/icu/source/i18n/chnsecal.cpp +++ b/yass/third_party/icu/source/i18n/chnsecal.cpp @@ -17,6 +17,8 @@ #include "chnsecal.h" +#include + #if !UCONFIG_NO_FORMATTING #include "umutex.h" @@ -383,7 +385,7 @@ void ChineseCalendar::add(UCalendarDateFields field, int32_t amount, UErrorCode& int32_t day = get(UCAL_JULIAN_DAY, status) - kEpochStartAsJulianDay; // Get local day if (U_FAILURE(status)) break; int32_t moon = day - dom + 1; // New moon - offsetMonth(moon, dom, amount); + offsetMonth(moon, dom, amount, status); } break; default: @@ -453,7 +455,7 @@ void ChineseCalendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode } if (newM != m) { - offsetMonth(moon, dom, newM - m); + offsetMonth(moon, dom, newM - m, status); } } break; @@ -653,9 +655,13 @@ UBool ChineseCalendar::isLeapMonthBetween(int32_t newMoon1, int32_t newMoon2) co } #endif - return (newMoon2 >= newMoon1) && - (isLeapMonthBetween(newMoon1, newMoonNear(newMoon2 - SYNODIC_GAP, false)) || - hasNoMajorSolarTerm(newMoon2)); + while (newMoon2 >= newMoon1) { + if (hasNoMajorSolarTerm(newMoon2)) { + return true; + } + newMoon2 = newMoonNear(newMoon2 - SYNODIC_GAP, false); + } + return false; } /** @@ -805,12 +811,21 @@ int32_t ChineseCalendar::newYear(int32_t gyear) const { * @param dom the 1-based day-of-month of the start position * @param delta the number of months to move forward or backward from * the start position + * @param status The status. */ -void ChineseCalendar::offsetMonth(int32_t newMoon, int32_t dom, int32_t delta) { - UErrorCode status = U_ZERO_ERROR; +void ChineseCalendar::offsetMonth(int32_t newMoon, int32_t dom, int32_t delta, + UErrorCode& status) { + if (U_FAILURE(status)) { return; } // Move to the middle of the month before our target month. - newMoon += (int32_t) (CalendarAstronomer::SYNODIC_MONTH * (delta - 0.5)); + double value = newMoon; + value += (CalendarAstronomer::SYNODIC_MONTH * + (static_cast(delta) - 0.5)); + if (value < INT32_MIN || value > INT32_MAX) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + newMoon = static_cast(value); // Search forward to the target month's new moon newMoon = newMoonNear(newMoon, true); @@ -969,10 +984,11 @@ int32_t ChineseCalendar::internalGetMonth() const { // months. UErrorCode status = U_ZERO_ERROR; temp->roll(UCAL_MONTH, internalGet(UCAL_ORDINAL_MONTH), status); - + U_ASSERT(U_SUCCESS(status)); ChineseCalendar *nonConstThis = (ChineseCalendar*)this; // cast away const nonConstThis->internalSet(UCAL_IS_LEAP_MONTH, temp->get(UCAL_IS_LEAP_MONTH, status)); + U_ASSERT(U_SUCCESS(status)); int32_t month = temp->get(UCAL_MONTH, status); U_ASSERT(U_SUCCESS(status)); nonConstThis->internalSet(UCAL_MONTH, month); diff --git a/yass/third_party/icu/source/i18n/chnsecal.h b/yass/third_party/icu/source/i18n/chnsecal.h index 9b5a629fad..e4910f6eb8 100644 --- a/yass/third_party/icu/source/i18n/chnsecal.h +++ b/yass/third_party/icu/source/i18n/chnsecal.h @@ -75,10 +75,6 @@ U_NAMESPACE_BEGIN *

  • Dershowitz and Reingold, Calendrical Calculations, * Cambridge University Press, 1997
  • * - *
  • Helmer Aslaksen's - * - * Chinese Calendar page
  • - * *
  • The * Calendar FAQ
  • * @@ -256,7 +252,7 @@ class U_I18N_API ChineseCalendar : public Calendar { virtual void computeChineseFields(int32_t days, int32_t gyear, int32_t gmonth, UBool setAllFields); virtual int32_t newYear(int32_t gyear) const; - virtual void offsetMonth(int32_t newMoon, int32_t dom, int32_t delta); + virtual void offsetMonth(int32_t newMoon, int32_t dom, int32_t delta, UErrorCode& status); const TimeZone* getChineseCalZoneAstroCalc() const; // UObject stuff diff --git a/yass/third_party/icu/source/i18n/collationbuilder.cpp b/yass/third_party/icu/source/i18n/collationbuilder.cpp index d8fb89738c..3da74cc483 100644 --- a/yass/third_party/icu/source/i18n/collationbuilder.cpp +++ b/yass/third_party/icu/source/i18n/collationbuilder.cpp @@ -1113,12 +1113,23 @@ CollationBuilder::addWithClosure(const UnicodeString &nfdPrefix, const UnicodeSt return ce32; } +// ICU-22517 +// This constant defines a limit for the addOnlyClosure to return +// error, to avoid taking a long time for canonical closure expansion. +// Please let us know if you have a reasonable use case that needed +// for a practical Collation rule that needs to increase this limit. +// This value is needed for compiling a rule with eight Hangul syllables such as +// "&a=b쫊쫊쫊쫊쫊쫊쫊쫊" without error, which should be more than realistic +// usage. +static constexpr int32_t kClosureLoopLimit = 6560; + uint32_t CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeString &nfdString, const int64_t newCEs[], int32_t newCEsLength, uint32_t ce32, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return ce32; } + int32_t loop = 0; // Map from canonically equivalent input to the CEs. (But not from the all-NFD input.) if(nfdPrefix.isEmpty()) { CanonicalIterator stringIter(nfdString, errorCode); @@ -1128,6 +1139,11 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt UnicodeString str = stringIter.next(); if(str.isBogus()) { break; } if(ignoreString(str, errorCode) || str == nfdString) { continue; } + if (loop++ > kClosureLoopLimit) { + // To avoid hang as in ICU-22517, return with error. + errorCode = U_INPUT_TOO_LONG_ERROR; + return ce32; + } ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32, errorCode); if(U_FAILURE(errorCode)) { return ce32; } } @@ -1144,6 +1160,11 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt UnicodeString str = stringIter.next(); if(str.isBogus()) { break; } if(ignoreString(str, errorCode) || (samePrefix && str == nfdString)) { continue; } + if (loop++ > kClosureLoopLimit) { + // To avoid hang as in ICU-22517, return with error. + errorCode = U_INPUT_TOO_LONG_ERROR; + return ce32; + } ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32, errorCode); if(U_FAILURE(errorCode)) { return ce32; } } diff --git a/yass/third_party/icu/source/i18n/collationruleparser.cpp b/yass/third_party/icu/source/i18n/collationruleparser.cpp index 4cc25a1f5c..ba740e8d18 100644 --- a/yass/third_party/icu/source/i18n/collationruleparser.cpp +++ b/yass/third_party/icu/source/i18n/collationruleparser.cpp @@ -24,6 +24,7 @@ #include "unicode/uloc.h" #include "unicode/unistr.h" #include "unicode/utf16.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "collation.h" @@ -34,6 +35,7 @@ #include "cstring.h" #include "patternprops.h" #include "uassert.h" +#include "ulocimp.h" #include "uvectr32.h" U_NAMESPACE_BEGIN @@ -604,19 +606,20 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) { lang.appendInvariantChars(v, errorCode); if(errorCode == U_MEMORY_ALLOCATION_ERROR) { return; } // BCP 47 language tag -> ICU locale ID - char localeID[ULOC_FULLNAME_CAPACITY]; + CharString localeID; int32_t parsedLength; - int32_t length = uloc_forLanguageTag(lang.data(), localeID, ULOC_FULLNAME_CAPACITY, - &parsedLength, &errorCode); - if(U_FAILURE(errorCode) || - parsedLength != lang.length() || length >= ULOC_FULLNAME_CAPACITY) { + { + CharStringByteSink sink(&localeID); + ulocimp_forLanguageTag(lang.data(), -1, sink, &parsedLength, &errorCode); + } + if(U_FAILURE(errorCode) || parsedLength != lang.length()) { errorCode = U_ZERO_ERROR; setParseError("expected language tag in [import langTag]", errorCode); return; } // localeID minus all keywords char baseID[ULOC_FULLNAME_CAPACITY]; - length = uloc_getBaseName(localeID, baseID, ULOC_FULLNAME_CAPACITY, &errorCode); + int32_t length = uloc_getBaseName(localeID.data(), baseID, ULOC_FULLNAME_CAPACITY, &errorCode); if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { errorCode = U_ZERO_ERROR; setParseError("expected language tag in [import langTag]", errorCode); @@ -629,11 +632,12 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) { uprv_memcpy(baseID, "und", 3); } // @collation=type, or length=0 if not specified - char collationType[ULOC_KEYWORDS_CAPACITY]; - length = uloc_getKeywordValue(localeID, "collation", - collationType, ULOC_KEYWORDS_CAPACITY, - &errorCode); - if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { + CharString collationType; + { + CharStringByteSink sink(&collationType); + ulocimp_getKeywordValue(localeID.data(), "collation", sink, &errorCode); + } + if(U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; setParseError("expected language tag in [import langTag]", errorCode); return; @@ -642,7 +646,8 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) { setParseError("[import langTag] is not supported", errorCode); } else { UnicodeString importedRules; - importer->getRules(baseID, length > 0 ? collationType : "standard", + importer->getRules(baseID, + !collationType.isEmpty() ? collationType.data() : "standard", importedRules, errorReason, errorCode); if(U_FAILURE(errorCode)) { if(errorReason == nullptr) { diff --git a/yass/third_party/icu/source/i18n/dayperiodrules.cpp b/yass/third_party/icu/source/i18n/dayperiodrules.cpp index 3d9ab5bfac..294390cce2 100644 --- a/yass/third_party/icu/source/i18n/dayperiodrules.cpp +++ b/yass/third_party/icu/source/i18n/dayperiodrules.cpp @@ -14,10 +14,12 @@ #include "dayperiodrules.h" #include "unicode/ures.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cstring.h" #include "ucln_in.h" #include "uhash.h" +#include "ulocimp.h" #include "umutex.h" #include "uresimp.h" @@ -342,7 +344,6 @@ const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCo const char *localeCode = locale.getBaseName(); char name[ULOC_FULLNAME_CAPACITY]; - char parentName[ULOC_FULLNAME_CAPACITY]; if (uprv_strlen(localeCode) < ULOC_FULLNAME_CAPACITY) { uprv_strcpy(name, localeCode); @@ -360,13 +361,14 @@ const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCo while (*name != '\0') { ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name); if (ruleSetNum == 0) { - // name and parentName can't be the same pointer, so fill in parent then copy to child. - uloc_getParent(name, parentName, ULOC_FULLNAME_CAPACITY, &errorCode); - if (*parentName == '\0') { + CharString parent; + CharStringByteSink sink(&parent); + ulocimp_getParent(name, sink, &errorCode); + if (parent.isEmpty()) { // Saves a lookup in the hash table. break; } - uprv_strcpy(name, parentName); + parent.extract(name, UPRV_LENGTHOF(name), errorCode); } else { break; } diff --git a/yass/third_party/icu/source/i18n/double-conversion-double-to-string.cpp b/yass/third_party/icu/source/i18n/double-conversion-double-to-string.cpp index 5ee6d2b8e8..938484bca8 100644 --- a/yass/third_party/icu/source/i18n/double-conversion-double-to-string.cpp +++ b/yass/third_party/icu/source/i18n/double-conversion-double-to-string.cpp @@ -94,7 +94,14 @@ void DoubleToStringConverter::CreateExponentialRepresentation( StringBuilder* result_builder) const { DOUBLE_CONVERSION_ASSERT(length != 0); result_builder->AddCharacter(decimal_digits[0]); - if (length != 1) { + if (length == 1) { + if ((flags_ & EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL) != 0) { + result_builder->AddCharacter('.'); + if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT_IN_EXPONENTIAL) != 0) { + result_builder->AddCharacter('0'); + } + } + } else { result_builder->AddCharacter('.'); result_builder->AddSubstring(&decimal_digits[1], length-1); } diff --git a/yass/third_party/icu/source/i18n/double-conversion-double-to-string.h b/yass/third_party/icu/source/i18n/double-conversion-double-to-string.h index 9940052c64..b8cb06a8b2 100644 --- a/yass/third_party/icu/source/i18n/double-conversion-double-to-string.h +++ b/yass/third_party/icu/source/i18n/double-conversion-double-to-string.h @@ -93,7 +93,9 @@ class DoubleToStringConverter { EMIT_TRAILING_DECIMAL_POINT = 2, EMIT_TRAILING_ZERO_AFTER_POINT = 4, UNIQUE_ZERO = 8, - NO_TRAILING_ZERO = 16 + NO_TRAILING_ZERO = 16, + EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL = 32, + EMIT_TRAILING_ZERO_AFTER_POINT_IN_EXPONENTIAL = 64 }; // Flags should be a bit-or combination of the possible Flags-enum. @@ -112,6 +114,13 @@ class DoubleToStringConverter { // of the result in precision mode. Matches printf's %g. // When EMIT_TRAILING_ZERO_AFTER_POINT is also given, one trailing zero is // preserved. + // - EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL: when the input number has + // exactly one significant digit and is converted into exponent form then a + // trailing decimal point is appended to the significand in shortest mode + // or in precision mode with one requested digit. + // - EMIT_TRAILING_ZERO_AFTER_POINT_IN_EXPONENTIAL: in addition to a trailing + // decimal point emits a trailing '0'-character. This flag requires the + // EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL flag. // // Infinity symbol and nan_symbol provide the string representation for these // special values. If the string is nullptr and the special value is encountered @@ -147,6 +156,22 @@ class DoubleToStringConverter { // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. // + // When converting numbers with exactly one significant digit to exponent + // form in shortest mode or in precision mode with one requested digit, the + // EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT flags have + // no effect. Use the EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL flag to + // append a decimal point in this case and the + // EMIT_TRAILING_ZERO_AFTER_POINT_IN_EXPONENTIAL flag to also append a + // '0'-character in this case. + // Example with decimal_in_shortest_low = 0: + // ToShortest(0.0009) -> "9e-4" + // with EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL deactivated. + // ToShortest(0.0009) -> "9.e-4" + // with EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL activated. + // ToShortest(0.0009) -> "9.0e-4" + // with EMIT_TRAILING_DECIMAL_POINT_IN_EXPONENTIAL activated and + // EMIT_TRAILING_ZERO_AFTER_POINT_IN_EXPONENTIAL activated. + // // The min_exponent_width is used for exponential representations. // The converter adds leading '0's to the exponent until the exponent // is at least min_exponent_width digits long. diff --git a/yass/third_party/icu/source/i18n/dtitvinf.cpp b/yass/third_party/icu/source/i18n/dtitvinf.cpp index 3733d04518..c4b6bbcf40 100644 --- a/yass/third_party/icu/source/i18n/dtitvinf.cpp +++ b/yass/third_party/icu/source/i18n/dtitvinf.cpp @@ -23,6 +23,7 @@ #include #endif +#include "bytesinkutil.h" #include "cmemory.h" #include "cstring.h" #include "unicode/msgfmt.h" @@ -35,6 +36,7 @@ #include "uresimp.h" #include "hash.h" #include "gregoimp.h" +#include "ulocimp.h" #include "uresimp.h" @@ -397,17 +399,19 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& status) // Get the correct calendar type const char * calendarTypeToUse = gGregorianTag; // initial default - char calendarType[ULOC_KEYWORDS_CAPACITY]; // to be filled in with the type to use, if all goes well char localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY]; // obtain a locale that always has the calendar key value that should be used (void)ures_getFunctionalEquivalent(localeWithCalendarKey, ULOC_LOCALE_IDENTIFIER_CAPACITY, nullptr, "calendar", "calendar", locName, nullptr, false, &status); localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination // now get the calendar key value from that locale - int32_t calendarTypeLen = uloc_getKeywordValue(localeWithCalendarKey, "calendar", calendarType, - ULOC_KEYWORDS_CAPACITY, &status); - if (U_SUCCESS(status) && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { - calendarTypeToUse = calendarType; + CharString calendarType; + { + CharStringByteSink sink(&calendarType); + ulocimp_getKeywordValue(localeWithCalendarKey, "calendar", sink, &status); + } + if (U_SUCCESS(status)) { + calendarTypeToUse = calendarType.data(); } status = U_ZERO_ERROR; diff --git a/yass/third_party/icu/source/i18n/dtptngen.cpp b/yass/third_party/icu/source/i18n/dtptngen.cpp index 1f74540fbd..aa23b7cd4a 100644 --- a/yass/third_party/icu/source/i18n/dtptngen.cpp +++ b/yass/third_party/icu/source/i18n/dtptngen.cpp @@ -29,6 +29,7 @@ #include "unicode/ustring.h" #include "unicode/rep.h" #include "unicode/region.h" +#include "bytesinkutil.h" #include "cpputils.h" #include "mutex.h" #include "umutex.h" @@ -37,7 +38,9 @@ #include "locbased.h" #include "hash.h" #include "uhash.h" +#include "ulocimp.h" #include "uresimp.h" +#include "ulocimp.h" #include "dtptngen_impl.h" #include "ucln_in.h" #include "charstr.h" @@ -655,17 +658,9 @@ void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErro if (U_FAILURE(status)) { return; } const char *language = locale.getLanguage(); - const char *country = locale.getCountry(); - - char regionOverride[8]; - int32_t regionOverrideLength = locale.getKeywordValue("rg", regionOverride, sizeof(regionOverride), status); - if (U_SUCCESS(status) && regionOverrideLength > 0) { - country = regionOverride; - if (regionOverrideLength > 2) { - // chop off any subdivision codes that may have been included - regionOverride[2] = '\0'; - } - } + char baseCountry[8]; + ulocimp_getRegionForSupplementalData(locale.getName(), false, baseCountry, 8, &status); + const char* country = baseCountry; Locale maxLocale; // must be here for correct lifetime if (*language == '\0' || *country == '\0') { @@ -910,22 +905,19 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& &localStatus); localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination // now get the calendar key value from that locale - char calendarType[ULOC_KEYWORDS_CAPACITY]; - int32_t calendarTypeLen = uloc_getKeywordValue( - localeWithCalendarKey, - "calendar", - calendarType, - ULOC_KEYWORDS_CAPACITY, - &localStatus); + destination.clear(); + { + CharStringByteSink sink(&destination); + ulocimp_getKeywordValue( + localeWithCalendarKey, + "calendar", + sink, + &localStatus); + } // If the input locale was invalid, don't fail with missing resource error, instead // continue with default of Gregorian. if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) { err = localStatus; - return; - } - if (calendarTypeLen > 0 && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { - destination.clear().append(calendarType, -1, err); - if (U_FAILURE(err)) { return; } } } } @@ -1031,7 +1023,7 @@ struct DateTimePatternGenerator::AvailableFormatsSink : public ResourceSink { AvailableFormatsSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {} virtual ~AvailableFormatsSink(); - virtual void put(const char *key, ResourceValue &value, UBool isRoot, + virtual void put(const char *key, ResourceValue &value, UBool /*isRoot*/, UErrorCode &errorCode) override { const UnicodeString formatKey(key, -1, US_INV); if (!dtpg.isAvailableFormatSet(formatKey) ) { @@ -1040,7 +1032,7 @@ struct DateTimePatternGenerator::AvailableFormatsSink : public ResourceSink { // derived from std patterns, but not a previous availableFormats entry: const UnicodeString& formatValue = value.getUnicodeString(errorCode); conflictingPattern.remove(); - dtpg.addPatternWithSkeleton(formatValue, &formatKey, !isRoot, conflictingPattern, errorCode); + dtpg.addPatternWithSkeleton(formatValue, &formatKey, true, conflictingPattern, errorCode); } } }; diff --git a/yass/third_party/icu/source/i18n/ethpccal.h b/yass/third_party/icu/source/i18n/ethpccal.h index 1a5cd4f41a..19d9e0efb7 100644 --- a/yass/third_party/icu/source/i18n/ethpccal.h +++ b/yass/third_party/icu/source/i18n/ethpccal.h @@ -89,7 +89,7 @@ public: /** * Constant for ነሐሴ, the 12th month of the Ethiopic year. */ - NEHASSA, + NEHASSE, /** * Constant for ጳጉሜን, the 13th month of the Ethiopic year. diff --git a/yass/third_party/icu/source/i18n/fmtable.cpp b/yass/third_party/icu/source/i18n/fmtable.cpp index 618868c0a2..65dafc95aa 100644 --- a/yass/third_party/icu/source/i18n/fmtable.cpp +++ b/yass/third_party/icu/source/i18n/fmtable.cpp @@ -56,7 +56,7 @@ using number::impl::DecimalQuantity; // Return true if *a == *b. static inline UBool objectEquals(const UObject* a, const UObject* b) { // LATER: return *a == *b; - return *((const Measure*) a) == *((const Measure*) b); + return *((const Measure*) a) == *b; } // Return a clone of *a. diff --git a/yass/third_party/icu/source/i18n/gender.cpp b/yass/third_party/icu/source/i18n/gender.cpp index 97d161de32..d023b50218 100644 --- a/yass/third_party/icu/source/i18n/gender.cpp +++ b/yass/third_party/icu/source/i18n/gender.cpp @@ -19,15 +19,20 @@ #if !UCONFIG_NO_FORMATTING +#include + #include "unicode/gender.h" #include "unicode/ugender.h" #include "unicode/ures.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "mutex.h" #include "uassert.h" #include "ucln_in.h" +#include "ulocimp.h" #include "umutex.h" #include "uhash.h" @@ -148,12 +153,18 @@ const GenderInfo* GenderInfo::loadInstance(const Locale& locale, UErrorCode& sta const char16_t* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &key_status); if (s == nullptr) { key_status = U_ZERO_ERROR; - char parentLocaleName[ULOC_FULLNAME_CAPACITY]; - uprv_strcpy(parentLocaleName, curLocaleName); - while (s == nullptr && uloc_getParent(parentLocaleName, parentLocaleName, ULOC_FULLNAME_CAPACITY, &key_status) > 0) { + CharString parentLocaleName(curLocaleName, key_status); + while (s == nullptr) { + { + CharString tmp; + CharStringByteSink sink(&tmp); + ulocimp_getParent(parentLocaleName.data(), sink, &status); + if (tmp.isEmpty()) break; + parentLocaleName = std::move(tmp); + } key_status = U_ZERO_ERROR; resLen = 0; - s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &key_status); + s = ures_getStringByKey(locRes.getAlias(), parentLocaleName.data(), &resLen, &key_status); key_status = U_ZERO_ERROR; } } diff --git a/yass/third_party/icu/source/i18n/gregocal.cpp b/yass/third_party/icu/source/i18n/gregocal.cpp index 5fd71d496c..ec14abbbf1 100644 --- a/yass/third_party/icu/source/i18n/gregocal.cpp +++ b/yass/third_party/icu/source/i18n/gregocal.cpp @@ -610,14 +610,6 @@ GregorianCalendar::monthLength(int32_t month, int32_t year) const // ------------------------------------- -int32_t -GregorianCalendar::yearLength(int32_t year) const -{ - return isLeapYear(year) ? 366 : 365; -} - -// ------------------------------------- - int32_t GregorianCalendar::yearLength() const { @@ -626,24 +618,6 @@ GregorianCalendar::yearLength() const // ------------------------------------- -/** -* After adjustments such as add(MONTH), add(YEAR), we don't want the -* month to jump around. E.g., we don't want Jan 31 + 1 month to go to Mar -* 3, we want it to go to Feb 28. Adjustments which might run into this -* problem call this method to retain the proper month. -*/ -void -GregorianCalendar::pinDayOfMonth() -{ - int32_t monthLen = monthLength(internalGetMonth()); - int32_t dom = internalGet(UCAL_DATE); - if(dom > monthLen) - set(UCAL_DATE, monthLen); -} - -// ------------------------------------- - - UBool GregorianCalendar::validateFields() const { diff --git a/yass/third_party/icu/source/i18n/hebrwcal.cpp b/yass/third_party/icu/source/i18n/hebrwcal.cpp index efd0d8fd4a..6451c89c59 100644 --- a/yass/third_party/icu/source/i18n/hebrwcal.cpp +++ b/yass/third_party/icu/source/i18n/hebrwcal.cpp @@ -777,7 +777,7 @@ int32_t HebrewCalendar::internalGetMonth() const { HebrewCalendar *nonConstThis = (HebrewCalendar*)this; // cast away const int32_t year = nonConstThis->handleGetExtendedYear(); - return ordinalMonth + ((isLeapYear(year) && (ordinalMonth > ADAR_1)) ? 1: 0); + return ordinalMonth + (((!isLeapYear(year)) && (ordinalMonth > ADAR_1)) ? 1: 0); } return Calendar::internalGetMonth(); } diff --git a/yass/third_party/icu/source/i18n/iso8601cal.cpp b/yass/third_party/icu/source/i18n/iso8601cal.cpp index 1bc81fac15..c3288bc6b5 100644 --- a/yass/third_party/icu/source/i18n/iso8601cal.cpp +++ b/yass/third_party/icu/source/i18n/iso8601cal.cpp @@ -14,7 +14,13 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ISO8601Calendar) ISO8601Calendar::ISO8601Calendar(const Locale& aLocale, UErrorCode& success) : GregorianCalendar(aLocale, success) { - setFirstDayOfWeek(UCAL_MONDAY); + UErrorCode fwStatus = U_ZERO_ERROR; + int32_t fwLength = aLocale.getKeywordValue("fw", nullptr, 0, fwStatus); + // Do not set first day of week for iso8601 to Monday if we have fw keyword + // and let the value set by the Calendar constructor to take care of it. + if (U_SUCCESS(fwStatus) && fwLength == 0) { + setFirstDayOfWeek(UCAL_MONDAY); + } setMinimalDaysInFirstWeek(4); } diff --git a/yass/third_party/icu/source/i18n/measunit.cpp b/yass/third_party/icu/source/i18n/measunit.cpp index 47ae5bcf5d..abb2199770 100644 --- a/yass/third_party/icu/source/i18n/measunit.cpp +++ b/yass/third_party/icu/source/i18n/measunit.cpp @@ -56,11 +56,11 @@ static const int32_t gOffsets[] = { 429, 430, 436, - 446, - 451, - 455, - 457, - 491 + 447, + 452, + 456, + 458, + 492 }; static const int32_t kCurrencyOffset = 5; @@ -532,6 +532,7 @@ static const char * const gSubTypes[] = { "watt", "atmosphere", "bar", + "gasoline-energy-density", "hectopascal", "inch-ofhg", "kilopascal", @@ -1687,70 +1688,78 @@ MeasureUnit MeasureUnit::getBar() { return MeasureUnit(18, 1); } -MeasureUnit *MeasureUnit::createHectopascal(UErrorCode &status) { +MeasureUnit *MeasureUnit::createGasolineEnergyDensity(UErrorCode &status) { return MeasureUnit::create(18, 2, status); } -MeasureUnit MeasureUnit::getHectopascal() { +MeasureUnit MeasureUnit::getGasolineEnergyDensity() { return MeasureUnit(18, 2); } -MeasureUnit *MeasureUnit::createInchHg(UErrorCode &status) { +MeasureUnit *MeasureUnit::createHectopascal(UErrorCode &status) { return MeasureUnit::create(18, 3, status); } -MeasureUnit MeasureUnit::getInchHg() { +MeasureUnit MeasureUnit::getHectopascal() { return MeasureUnit(18, 3); } -MeasureUnit *MeasureUnit::createKilopascal(UErrorCode &status) { +MeasureUnit *MeasureUnit::createInchHg(UErrorCode &status) { return MeasureUnit::create(18, 4, status); } -MeasureUnit MeasureUnit::getKilopascal() { +MeasureUnit MeasureUnit::getInchHg() { return MeasureUnit(18, 4); } -MeasureUnit *MeasureUnit::createMegapascal(UErrorCode &status) { +MeasureUnit *MeasureUnit::createKilopascal(UErrorCode &status) { return MeasureUnit::create(18, 5, status); } -MeasureUnit MeasureUnit::getMegapascal() { +MeasureUnit MeasureUnit::getKilopascal() { return MeasureUnit(18, 5); } -MeasureUnit *MeasureUnit::createMillibar(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMegapascal(UErrorCode &status) { return MeasureUnit::create(18, 6, status); } -MeasureUnit MeasureUnit::getMillibar() { +MeasureUnit MeasureUnit::getMegapascal() { return MeasureUnit(18, 6); } -MeasureUnit *MeasureUnit::createMillimeterOfMercury(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMillibar(UErrorCode &status) { return MeasureUnit::create(18, 7, status); } -MeasureUnit MeasureUnit::getMillimeterOfMercury() { +MeasureUnit MeasureUnit::getMillibar() { return MeasureUnit(18, 7); } -MeasureUnit *MeasureUnit::createPascal(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMillimeterOfMercury(UErrorCode &status) { return MeasureUnit::create(18, 8, status); } -MeasureUnit MeasureUnit::getPascal() { +MeasureUnit MeasureUnit::getMillimeterOfMercury() { return MeasureUnit(18, 8); } -MeasureUnit *MeasureUnit::createPoundPerSquareInch(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPascal(UErrorCode &status) { return MeasureUnit::create(18, 9, status); } -MeasureUnit MeasureUnit::getPoundPerSquareInch() { +MeasureUnit MeasureUnit::getPascal() { return MeasureUnit(18, 9); } +MeasureUnit *MeasureUnit::createPoundPerSquareInch(UErrorCode &status) { + return MeasureUnit::create(18, 10, status); +} + +MeasureUnit MeasureUnit::getPoundPerSquareInch() { + return MeasureUnit(18, 10); +} + MeasureUnit *MeasureUnit::createBeaufort(UErrorCode &status) { return MeasureUnit::create(19, 0, status); } diff --git a/yass/third_party/icu/source/i18n/nfsubs.cpp b/yass/third_party/icu/source/i18n/nfsubs.cpp index 4f3247ce50..5256b8a39b 100644 --- a/yass/third_party/icu/source/i18n/nfsubs.cpp +++ b/yass/third_party/icu/source/i18n/nfsubs.cpp @@ -103,7 +103,25 @@ public: } virtual double transformNumber(double number) const override { - if (getRuleSet()) { + bool doFloor = getRuleSet() != nullptr; + if (!doFloor) { + // This is a HACK that partially addresses ICU-22313. The original code wanted us to do + // floor() on the result if we were passing it to another rule set, but not if we were passing + // it to a DecimalFormat. But the DurationRules rule set has multiplier substitutions where + // we DO want to do the floor() operation. What we REALLY want is to do floor() any time + // the owning rule also has a ModulusSubsitution, but we don't have access to that information + // here, so instead we're doing a floor() any time the DecimalFormat has maxFracDigits equal to + // 0. This seems to work with our existing rule sets, but could be a problem in the future, + // but the "real" fix for DurationRules isn't worth doing, since we're deprecating DurationRules + // anyway. This is enough to keep it from being egregiously wrong, without obvious side + // effects. --rtg 8/16/23 + const DecimalFormat* decimalFormat = getNumberFormat(); + if (decimalFormat == nullptr || decimalFormat->getMaximumFractionDigits() == 0) { + doFloor = true; + } + } + + if (doFloor) { return uprv_floor(number / divisor); } else { return number / divisor; diff --git a/yass/third_party/icu/source/i18n/plurrule.cpp b/yass/third_party/icu/source/i18n/plurrule.cpp index 9c37b09e25..839d14147c 100644 --- a/yass/third_party/icu/source/i18n/plurrule.cpp +++ b/yass/third_party/icu/source/i18n/plurrule.cpp @@ -12,6 +12,8 @@ #include #include +#include + #include "unicode/utypes.h" #include "unicode/localpointer.h" #include "unicode/plurrule.h" @@ -20,6 +22,7 @@ #include "unicode/numfmt.h" #include "unicode/decimfmt.h" #include "unicode/numberrangeformatter.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" @@ -40,6 +43,7 @@ #include "util.h" #include "pluralranges.h" #include "numrange_impl.h" +#include "ulocimp.h" #if !UCONFIG_NO_FORMATTING @@ -827,14 +831,19 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC if (s == nullptr) { // Check parent locales. UErrorCode status = U_ZERO_ERROR; - char parentLocaleName[ULOC_FULLNAME_CAPACITY]; const char *curLocaleName2=locale.getBaseName(); - uprv_strcpy(parentLocaleName, curLocaleName2); + CharString parentLocaleName(curLocaleName2, status); - while (uloc_getParent(parentLocaleName, parentLocaleName, - ULOC_FULLNAME_CAPACITY, &status) > 0) { + for (;;) { + { + CharString tmp; + CharStringByteSink sink(&tmp); + ulocimp_getParent(parentLocaleName.data(), sink, &status); + if (tmp.isEmpty()) break; + parentLocaleName = std::move(tmp); + } resLen=0; - s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); + s = ures_getStringByKey(locRes.getAlias(), parentLocaleName.data(), &resLen, &status); if (s != nullptr) { errCode = U_ZERO_ERROR; break; diff --git a/yass/third_party/icu/source/i18n/reldatefmt.cpp b/yass/third_party/icu/source/i18n/reldatefmt.cpp index 24d22a4b4b..ebdf6c9716 100644 --- a/yass/third_party/icu/source/i18n/reldatefmt.cpp +++ b/yass/third_party/icu/source/i18n/reldatefmt.cpp @@ -12,7 +12,7 @@ #include "unicode/reldatefmt.h" -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION +#if !UCONFIG_NO_FORMATTING #include #include @@ -701,7 +701,7 @@ static UBool getDateTimePattern( return getStringByIndex(topLevel.getAlias(), dateTimeFormatOffset, result, status); } -template<> +template<> const RelativeDateTimeCacheData *LocaleCacheKey::createObject(const void * /*unused*/, UErrorCode &status) const { const char *localeId = fLoc.getName(); LocalUResourceBundlePointer topLevel(ures_open(nullptr, localeId, &status)); @@ -761,6 +761,7 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter(UErrorCode& status) : fStyle(UDAT_STYLE_LONG), fContext(UDISPCTX_CAPITALIZATION_NONE), fOptBreakIterator(nullptr) { + (void)fOptBreakIterator; // suppress unused field warning init(nullptr, nullptr, status); } @@ -804,16 +805,25 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter( if (U_FAILURE(status)) { return; } + if (styl < 0 || UDAT_STYLE_COUNT <= styl) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } if ((capitalizationContext >> 8) != UDISPCTX_TYPE_CAPITALIZATION) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) { +#if !UCONFIG_NO_BREAK_ITERATION BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status); if (U_FAILURE(status)) { return; } init(nfToAdopt, bi, status); +#else + status = U_UNSUPPORTED_ERROR; + return; +#endif // !UCONFIG_NO_BREAK_ITERATION } else { init(nfToAdopt, nullptr, status); } @@ -832,9 +842,11 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter( fCache->addRef(); fNumberFormat->addRef(); fPluralRules->addRef(); +#if !UCONFIG_NO_BREAK_ITERATION if (fOptBreakIterator != nullptr) { fOptBreakIterator->addRef(); } +#endif // !UCONFIG_NO_BREAK_ITERATION } RelativeDateTimeFormatter& RelativeDateTimeFormatter::operator=( @@ -843,7 +855,9 @@ RelativeDateTimeFormatter& RelativeDateTimeFormatter::operator=( SharedObject::copyPtr(other.fCache, fCache); SharedObject::copyPtr(other.fNumberFormat, fNumberFormat); SharedObject::copyPtr(other.fPluralRules, fPluralRules); +#if !UCONFIG_NO_BREAK_ITERATION SharedObject::copyPtr(other.fOptBreakIterator, fOptBreakIterator); +#endif // !UCONFIG_NO_BREAK_ITERATION fStyle = other.fStyle; fContext = other.fContext; fLocale = other.fLocale; @@ -861,9 +875,11 @@ RelativeDateTimeFormatter::~RelativeDateTimeFormatter() { if (fPluralRules != nullptr) { fPluralRules->removeRef(); } +#if !UCONFIG_NO_BREAK_ITERATION if (fOptBreakIterator != nullptr) { fOptBreakIterator->removeRef(); } +#endif // !UCONFIG_NO_BREAK_ITERATION } const NumberFormat& RelativeDateTimeFormatter::getNumberFormat() const { @@ -1015,6 +1031,10 @@ void RelativeDateTimeFormatter::formatNumericImpl( if (U_FAILURE(status)) { return; } + if (unit < 0 || UDAT_REL_UNIT_COUNT <= unit) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } UDateDirection direction = UDAT_DIRECTION_NEXT; if (std::signbit(offset)) { // needed to handle -0.0 direction = UDAT_DIRECTION_LAST; @@ -1083,7 +1103,9 @@ void RelativeDateTimeFormatter::formatAbsoluteImpl( if (U_FAILURE(status)) { return; } - if (unit == UDAT_ABSOLUTE_NOW && direction != UDAT_DIRECTION_PLAIN) { + if ((unit < 0 || UDAT_ABSOLUTE_UNIT_COUNT <= unit) || + (direction < 0 || UDAT_DIRECTION_COUNT <= direction) || + (unit == UDAT_ABSOLUTE_NOW && direction != UDAT_DIRECTION_PLAIN)) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } @@ -1191,6 +1213,7 @@ UnicodeString& RelativeDateTimeFormatter::combineDateAndTime( } UnicodeString& RelativeDateTimeFormatter::adjustForContext(UnicodeString &str) const { +#if !UCONFIG_NO_BREAK_ITERATION if (fOptBreakIterator == nullptr || str.length() == 0 || !u_islower(str.char32At(0))) { return str; @@ -1204,25 +1227,36 @@ UnicodeString& RelativeDateTimeFormatter::adjustForContext(UnicodeString &str) c fOptBreakIterator->get(), fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); +#endif // !UCONFIG_NO_BREAK_ITERATION return str; } UBool RelativeDateTimeFormatter::checkNoAdjustForContext(UErrorCode& status) const { +#if !UCONFIG_NO_BREAK_ITERATION // This is unsupported because it's hard to keep fields in sync with title // casing. The code could be written and tested if there is demand. if (fOptBreakIterator != nullptr) { status = U_UNSUPPORTED_ERROR; return false; } +#else + (void)status; // suppress unused argument warning +#endif // !UCONFIG_NO_BREAK_ITERATION return true; } void RelativeDateTimeFormatter::init( NumberFormat *nfToAdopt, +#if !UCONFIG_NO_BREAK_ITERATION BreakIterator *biToAdopt, +#else + std::nullptr_t, +#endif // !UCONFIG_NO_BREAK_ITERATION UErrorCode &status) { LocalPointer nf(nfToAdopt); +#if !UCONFIG_NO_BREAK_ITERATION LocalPointer bi(biToAdopt); +#endif // !UCONFIG_NO_BREAK_ITERATION UnifiedCache::getByLocale(fLocale, fCache, status); if (U_FAILURE(status)) { return; @@ -1251,6 +1285,7 @@ void RelativeDateTimeFormatter::init( nf.orphan(); SharedObject::copyPtr(shared, fNumberFormat); } +#if !UCONFIG_NO_BREAK_ITERATION if (bi.isNull()) { SharedObject::clearPtr(fOptBreakIterator); } else { @@ -1262,6 +1297,7 @@ void RelativeDateTimeFormatter::init( bi.orphan(); SharedObject::copyPtr(shared, fOptBreakIterator); } +#endif // !UCONFIG_NO_BREAK_ITERATION } U_NAMESPACE_END diff --git a/yass/third_party/icu/source/i18n/reldtfmt.cpp b/yass/third_party/icu/source/i18n/reldtfmt.cpp index 0c836a0b79..7015c13089 100644 --- a/yass/third_party/icu/source/i18n/reldtfmt.cpp +++ b/yass/third_party/icu/source/i18n/reldtfmt.cpp @@ -78,6 +78,14 @@ RelativeDateFormat::RelativeDateFormat( UDateFormatStyle timeStyle, UDateFormatS if(U_FAILURE(status) ) { return; } + if (dateStyle != UDAT_FULL_RELATIVE && + dateStyle != UDAT_LONG_RELATIVE && + dateStyle != UDAT_MEDIUM_RELATIVE && + dateStyle != UDAT_SHORT_RELATIVE && + dateStyle != UDAT_RELATIVE) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } if (timeStyle < UDAT_NONE || timeStyle > UDAT_SHORT) { // don't support other time styles (e.g. relative styles), for now diff --git a/yass/third_party/icu/source/i18n/rulebasedcollator.cpp b/yass/third_party/icu/source/i18n/rulebasedcollator.cpp index e9482628d9..cf4cfc87f2 100644 --- a/yass/third_party/icu/source/i18n/rulebasedcollator.cpp +++ b/yass/third_party/icu/source/i18n/rulebasedcollator.cpp @@ -32,6 +32,7 @@ #include "unicode/utf8.h" #include "unicode/uversion.h" #include "bocsu.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "collation.h" @@ -50,6 +51,7 @@ #include "ucol_imp.h" #include "uhash.h" #include "uitercollationiterator.h" +#include "ulocimp.h" #include "ustr_imp.h" #include "utf16collationiterator.h" #include "utf8collationiterator.h" @@ -1579,8 +1581,12 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale, appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode); } // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default. - length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode); - appendSubtag(result, 'K', subtag, length, errorCode); + { + CharString collation; + CharStringByteSink sink(&collation); + ulocimp_getKeywordValue(resultLocale, "collation", sink, &errorCode); + appendSubtag(result, 'K', collation.data(), collation.length(), errorCode); + } length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode); if (length == 0) { appendSubtag(result, 'L', "root", 4, errorCode); diff --git a/yass/third_party/icu/source/i18n/smpdtfmt.cpp b/yass/third_party/icu/source/i18n/smpdtfmt.cpp index a87fb8d5e1..628f7febe0 100644 --- a/yass/third_party/icu/source/i18n/smpdtfmt.cpp +++ b/yass/third_party/icu/source/i18n/smpdtfmt.cpp @@ -721,19 +721,28 @@ void SimpleDateFormat::construct(EStyle timeStyle, UnicodeString timePattern; if (timeStyle >= kFull && timeStyle <= kShort) { + bool hasRgOrHcSubtag = false; + // also use DTPG if the locale has the "rg" or "hc" ("hours") subtag-- even if the overriding region + // or hour cycle is the same as the one we get by default, we go through the DateTimePatternGenerator + UErrorCode dummyErr1 = U_ZERO_ERROR, dummyErr2 = U_ZERO_ERROR; + if (locale.getKeywordValue("rg", nullptr, 0, dummyErr1) > 0 || locale.getKeywordValue("hours", nullptr, 0, dummyErr2) > 0) { + hasRgOrHcSubtag = true; + } + const char* baseLocID = locale.getBaseName(); if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) { UErrorCode useStatus = U_ZERO_ERROR; Locale baseLoc(baseLocID); Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus)); - if (U_SUCCESS(useStatus) && validLoc!=baseLoc) { - bool useDTPG = false; + if (hasRgOrHcSubtag || (U_SUCCESS(useStatus) && validLoc!=baseLoc)) { + bool useDTPG = hasRgOrHcSubtag; const char* baseReg = baseLoc.getCountry(); // empty string if no region if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0) || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) { // use DTPG if // * baseLoc has a region and validLoc does not have the same one (or has none), OR // * validLoc has a different language code than baseLoc + // * the original locale has the rg or hc subtag useDTPG = true; } if (useDTPG) { diff --git a/yass/third_party/icu/source/i18n/timezone.cpp b/yass/third_party/icu/source/i18n/timezone.cpp index 75479267d7..02cbc78cef 100644 --- a/yass/third_party/icu/source/i18n/timezone.cpp +++ b/yass/third_party/icu/source/i18n/timezone.cpp @@ -1591,6 +1591,22 @@ TimeZone::getCanonicalID(const UnicodeString& id, UnicodeString& canonicalID, UB return canonicalID; } +UnicodeString& +TimeZone::getIanaID(const UnicodeString& id, UnicodeString& ianaID, UErrorCode& status) +{ + ianaID.remove(); + if (U_FAILURE(status)) { + return ianaID; + } + if (id.compare(ConstChar16Ptr(UNKNOWN_ZONE_ID), UNKNOWN_ZONE_ID_LENGTH) == 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + ianaID.setToBogus(); + } else { + ZoneMeta::getIanaID(id, ianaID, status); + } + return ianaID; +} + UnicodeString& TimeZone::getWindowsID(const UnicodeString& id, UnicodeString& winid, UErrorCode& status) { winid.remove(); diff --git a/yass/third_party/icu/source/i18n/tmutfmt.cpp b/yass/third_party/icu/source/i18n/tmutfmt.cpp index 87b509ea35..dc48b2ee2a 100644 --- a/yass/third_party/icu/source/i18n/tmutfmt.cpp +++ b/yass/third_party/icu/source/i18n/tmutfmt.cpp @@ -11,14 +11,18 @@ #if !UCONFIG_NO_FORMATTING +#include + #include "unicode/decimfmt.h" #include "unicode/localpointer.h" #include "plurrule_impl.h" #include "uvector.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "hash.h" +#include "ulocimp.h" #include "uresimp.h" #include "ureslocs.h" #include "unicode/msgfmt.h" @@ -556,14 +560,17 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, return; } UErrorCode status = U_ZERO_ERROR; - char parentLocale[ULOC_FULLNAME_CAPACITY]; - uprv_strcpy(parentLocale, localeName); - int32_t locNameLen; + CharString parentLocale(localeName, status); U_ASSERT(countToPatterns != nullptr); - while ((locNameLen = uloc_getParent(parentLocale, parentLocale, - ULOC_FULLNAME_CAPACITY, &status)) >= 0){ + for (;;) { + { + CharString tmp; + CharStringByteSink sink(&tmp); + ulocimp_getParent(parentLocale.data(), sink, &status); + parentLocale = std::move(tmp); + } // look for pattern for srcPluralCount in locale tree - LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_UNIT, parentLocale, &status)); + LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_UNIT, parentLocale.data(), &status)); LocalUResourceBundlePointer unitsRes(ures_getByKey(rb.getAlias(), key, nullptr, &status)); const char* timeUnitName = getTimeUnitName(srcTimeUnitField, status); LocalUResourceBundlePointer countsToPatternRB(ures_getByKey(unitsRes.getAlias(), timeUnitName, nullptr, &status)); @@ -594,14 +601,14 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, return; } status = U_ZERO_ERROR; - if (locNameLen == 0) { + if (parentLocale.isEmpty()) { break; } } // if no unitsShort resource was found even after fallback to root locale // then search the units resource fallback from the current level to root - if ( locNameLen == 0 && uprv_strcmp(key, gShortUnitsTag) == 0) { + if ( parentLocale.isEmpty() && uprv_strcmp(key, gShortUnitsTag) == 0) { #ifdef TMUTFMT_DEBUG std::cout << "loop into searchInLocaleChain since Short-Long-Alternative \n"; #endif diff --git a/yass/third_party/icu/source/i18n/transreg.cpp b/yass/third_party/icu/source/i18n/transreg.cpp index 46c68eb74d..3441076334 100644 --- a/yass/third_party/icu/source/i18n/transreg.cpp +++ b/yass/third_party/icu/source/i18n/transreg.cpp @@ -11,6 +11,7 @@ */ #include "unicode/utypes.h" +#include "unicode/rep.h" #if !UCONFIG_NO_TRANSLITERATION @@ -531,7 +532,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : registry(true, status), specDAG(true, SPECDAG_INIT_SIZE, status), variantList(VARIANT_LIST_INIT_SIZE, status), - availableIDs(AVAILABLE_IDS_INIT_SIZE, status) + availableIDs(true, AVAILABLE_IDS_INIT_SIZE, status) { registry.setValueDeleter(deleteEntry); variantList.setDeleter(uprv_deleteUObject); @@ -540,8 +541,6 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : if (emptyString != nullptr) { variantList.adoptElement(emptyString, status); } - availableIDs.setDeleter(uprv_deleteUObject); - availableIDs.setComparer(uhash_compareCaselessUnicodeString); specDAG.setValueDeleter(uhash_deleteHashtable); } @@ -714,7 +713,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) { TransliteratorIDParser::STVtoID(source, target, variant, id); registry.remove(id); removeSTV(source, target, variant); - availableIDs.removeElement((void*) &id); + availableIDs.remove(id); } //---------------------------------------------------------------------- @@ -728,7 +727,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) { * i from 0 to countAvailableIDs() - 1. */ int32_t TransliteratorRegistry::countAvailableIDs() const { - return availableIDs.size(); + return availableIDs.count(); } /** @@ -738,10 +737,27 @@ int32_t TransliteratorRegistry::countAvailableIDs() const { * range, the result of getAvailableID(0) is returned. */ const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const { - if (index < 0 || index >= availableIDs.size()) { + if (index < 0 || index >= availableIDs.count()) { index = 0; } - return *(const UnicodeString*) availableIDs[index]; + + int32_t pos = UHASH_FIRST; + const UHashElement *e = nullptr; + while (index-- >= 0) { + e = availableIDs.nextElement(pos); + if (e == nullptr) { + break; + } + } + + if (e != nullptr) { + return *(UnicodeString*) e->key.pointer; + } + + // If the code reaches here, the hash table was likely modified during iteration. + // Return an statically initialized empty string due to reference return type. + static UnicodeString empty; + return empty; } StringEnumeration* TransliteratorRegistry::getAvailableIDs() const { @@ -852,14 +868,14 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index, //---------------------------------------------------------------------- TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) : - index(0), reg(_reg) { + pos(UHASH_FIRST), size(_reg.availableIDs.count()), reg(_reg) { } TransliteratorRegistry::Enumeration::~Enumeration() { } int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const { - return reg.availableIDs.size(); + return size; } const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) { @@ -875,22 +891,27 @@ const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& stat if (U_FAILURE(status)) { return nullptr; } - int32_t n = reg.availableIDs.size(); - if (index > n) { + int32_t n = reg.availableIDs.count(); + if (n != size) { status = U_ENUM_OUT_OF_SYNC_ERROR; - } - // index == n is okay -- this means we've reached the end - if (index < n) { - // Copy the string! This avoids lifetime problems. - unistr = *(const UnicodeString*)reg.availableIDs[index++]; - return &unistr; - } else { return nullptr; } + + const UHashElement* element = reg.availableIDs.nextElement(pos); + if (element == nullptr) { + // If the code reaches this point, it means that it's out of sync + // or the caller keeps asking for snext(). + return nullptr; + } + + // Copy the string! This avoids lifetime problems. + unistr = *(const UnicodeString*) element->key.pointer; + return &unistr; } void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) { - index = 0; + pos = UHASH_FIRST; + size = reg.availableIDs.count(); } UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration) @@ -945,18 +966,12 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID, registry.put(ID, adopted, status); if (visible) { registerSTV(source, target, variant); - if (!availableIDs.contains((void*) &ID)) { - UnicodeString *newID = ID.clone(); - // Check to make sure newID was created. - if (newID != nullptr) { - // NUL-terminate the ID string - newID->getTerminatedBuffer(); - availableIDs.adoptElement(newID, status); - } + if (!availableIDs.containsKey(ID)) { + availableIDs.puti(ID, /* unused value */ 1, status); } } else { removeSTV(source, target, variant); - availableIDs.removeElement((void*) &ID); + availableIDs.remove(ID); } } diff --git a/yass/third_party/icu/source/i18n/transreg.h b/yass/third_party/icu/source/i18n/transreg.h index da9c2ee8b9..3862be881d 100644 --- a/yass/third_party/icu/source/i18n/transreg.h +++ b/yass/third_party/icu/source/i18n/transreg.h @@ -423,7 +423,8 @@ class TransliteratorRegistry : public UMemory { static UClassID U_EXPORT2 getStaticClassID(); virtual UClassID getDynamicClassID() const override; private: - int32_t index; + int32_t pos; + int32_t size; const TransliteratorRegistry& reg; }; friend class Enumeration; @@ -452,7 +453,7 @@ class TransliteratorRegistry : public UMemory { /** * Vector of public full IDs. */ - UVector availableIDs; + Hashtable availableIDs; TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class diff --git a/yass/third_party/icu/source/i18n/ucal.cpp b/yass/third_party/icu/source/i18n/ucal.cpp index 18d9cf4ec7..dfc8e4f251 100644 --- a/yass/third_party/icu/source/i18n/ucal.cpp +++ b/yass/third_party/icu/source/i18n/ucal.cpp @@ -24,6 +24,7 @@ #include "unicode/localpointer.h" #include "cmemory.h" #include "cstring.h" +#include "iso8601cal.h" #include "ustrenum.h" #include "uenumimp.h" #include "ulist.h" @@ -307,7 +308,8 @@ ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } - if(typeid(*cpp_cal) != typeid(GregorianCalendar)) { + if(typeid(*cpp_cal) != typeid(GregorianCalendar) && + typeid(*cpp_cal) != typeid(ISO8601Calendar)) { *pErrorCode = U_UNSUPPORTED_ERROR; return; } @@ -329,7 +331,8 @@ ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return (UDate)0; } - if(typeid(*cpp_cal) != typeid(GregorianCalendar)) { + if(typeid(*cpp_cal) != typeid(GregorianCalendar) && + typeid(*cpp_cal) != typeid(ISO8601Calendar)) { *pErrorCode = U_UNSUPPORTED_ERROR; return (UDate)0; } @@ -625,6 +628,16 @@ ucal_getCanonicalTimeZoneID(const char16_t* id, int32_t len, return reslen; } +U_DRAFT int32_t U_EXPORT2 +ucal_getIanaTimeZoneID(const char16_t* id, int32_t len, + char16_t* result, int32_t resultCapacity, UErrorCode* status) +{ + UnicodeString ianaID; + TimeZone::getIanaID(UnicodeString(id, len), ianaID, *status); + return ianaID.extract(result, resultCapacity, *status); +} + + U_CAPI const char * U_EXPORT2 ucal_getType(const UCalendar *cal, UErrorCode* status) { diff --git a/yass/third_party/icu/source/i18n/ucol_res.cpp b/yass/third_party/icu/source/i18n/ucol_res.cpp index 9bf28db264..4ec6582f89 100644 --- a/yass/third_party/icu/source/i18n/ucol_res.cpp +++ b/yass/third_party/icu/source/i18n/ucol_res.cpp @@ -174,10 +174,20 @@ CollationLoader::CollationLoader(const CollationCacheEntry *re, const Locale &re defaultType[0] = 0; if(U_FAILURE(errorCode)) { return; } + if (locale.isBogus()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } // Canonicalize the locale ID: Ignore all irrelevant keywords. const char *baseName = locale.getBaseName(); if(uprv_strcmp(locale.getName(), baseName) != 0) { locale = Locale(baseName); + // Due to ICU-22416, we may have bogus locale constructed from + // a string of getBaseName(). + if (locale.isBogus()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } // Fetch the collation type from the locale ID. int32_t typeLength = requested.getKeywordValue("collation", diff --git a/yass/third_party/icu/source/i18n/ucol_sit.cpp b/yass/third_party/icu/source/i18n/ucol_sit.cpp index a740286d79..2e5bce2cba 100644 --- a/yass/third_party/icu/source/i18n/ucol_sit.cpp +++ b/yass/third_party/icu/source/i18n/ucol_sit.cpp @@ -20,6 +20,8 @@ #include "unicode/utf16.h" #include "utracimp.h" #include "ucol_imp.h" +#include "ulocimp.h" +#include "bytesinkutil.h" #include "cmemory.h" #include "cstring.h" #include "uresimp.h" @@ -86,7 +88,6 @@ static const char providerKeyword[] = "@sp="; static const int32_t locElementCount = UCOL_SIT_LOCELEMENT_MAX+1; static const int32_t locElementCapacity = 32; static const int32_t loc3066Capacity = 256; -static const int32_t internalBufferSize = 512; /* structure containing specification of a collator. Initialized * from a short string. Also used to construct a short string from a @@ -450,38 +451,37 @@ ucol_prepareShortStringOpen( const char *definition, ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s, *status); - char buffer[internalBufferSize]; - uprv_memset(buffer, 0, internalBufferSize); - uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status); + CharString buffer; + { + CharStringByteSink sink(&buffer); + ulocimp_canonicalize(s.locale.data(), sink, status); + } - UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer, status); + UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer.data(), status); /* we try to find stuff from keyword */ UResourceBundle *collations = ures_getByKey(b, "collations", nullptr, status); UResourceBundle *collElem = nullptr; - char keyBuffer[256]; - // if there is a keyword, we pick it up and try to get elements - int32_t keyLen = uloc_getKeywordValue(buffer, "collation", keyBuffer, sizeof(keyBuffer), status); - // Treat too long a value as no keyword. - if(keyLen >= (int32_t)sizeof(keyBuffer)) { - keyLen = 0; - *status = U_ZERO_ERROR; + CharString keyBuffer; + { + // if there is a keyword, we pick it up and try to get elements + CharStringByteSink sink(&keyBuffer); + ulocimp_getKeywordValue(buffer.data(), "collation", sink, status); } - if(keyLen == 0) { + if(keyBuffer.isEmpty()) { // no keyword // we try to find the default setting, which will give us the keyword value UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", nullptr, status); if(U_SUCCESS(*status)) { int32_t defaultKeyLen = 0; const char16_t *defaultKey = ures_getString(defaultColl, &defaultKeyLen, status); - u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); - keyBuffer[defaultKeyLen] = 0; + keyBuffer.appendInvariantChars(defaultKey, defaultKeyLen, *status); } else { *status = U_INTERNAL_PROGRAM_ERROR; return; } ures_close(defaultColl); } - collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status); + collElem = ures_getByKeyWithFallback(collations, keyBuffer.data(), collElem, status); ures_close(collElem); ures_close(collations); ures_close(b); @@ -520,14 +520,16 @@ ucol_openFromShortString( const char *definition, string = ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s, *status); - char buffer[internalBufferSize]; - uprv_memset(buffer, 0, internalBufferSize); #ifdef UCOL_TRACE_SIT fprintf(stderr, "DEF %s, DATA %s, ERR %s\n", definition, s.locale.data(), u_errorName(*status)); #endif - uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status); + CharString buffer; + { + CharStringByteSink sink(&buffer); + ulocimp_canonicalize(s.locale.data(), sink, status); + } - UCollator *result = ucol_open(buffer, status); + UCollator *result = ucol_open(buffer.data(), status); int32_t i = 0; for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { diff --git a/yass/third_party/icu/source/i18n/ulocdata.cpp b/yass/third_party/icu/source/i18n/ulocdata.cpp index 8e0687c5f6..0aab629581 100644 --- a/yass/third_party/icu/source/i18n/ulocdata.cpp +++ b/yass/third_party/icu/source/i18n/ulocdata.cpp @@ -68,13 +68,19 @@ ulocdata_open(const char *localeID, UErrorCode *status) uld->noSubstitute = false; uld->bundle = ures_open(nullptr, localeID, status); - uld->langBundle = ures_open(U_ICUDATA_LANG, localeID, status); if (U_FAILURE(*status)) { uprv_free(uld); return nullptr; } + // ICU-22149: not all functions require lang data, so fail gracefully if it is not present + UErrorCode oldStatus = *status; + uld->langBundle = ures_open(U_ICUDATA_LANG, localeID, status); + if (*status == U_MISSING_RESOURCE_ERROR) { + *status = oldStatus; + } + return uld; } @@ -288,6 +294,11 @@ ulocdata_getLocaleDisplayPattern(ULocaleData *uld, if (U_FAILURE(*status)) return 0; + if (uld->langBundle == nullptr) { + *status = U_MISSING_RESOURCE_ERROR; + return 0; + } + patternBundle = ures_getByKey(uld->langBundle, "localeDisplayPattern", nullptr, &localStatus); if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { @@ -340,6 +351,11 @@ ulocdata_getLocaleSeparator(ULocaleData *uld, if (U_FAILURE(*status)) return 0; + if (uld->langBundle == nullptr) { + *status = U_MISSING_RESOURCE_ERROR; + return 0; + } + separatorBundle = ures_getByKey(uld->langBundle, "localeDisplayPattern", nullptr, &localStatus); if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { diff --git a/yass/third_party/icu/source/i18n/unicode/calendar.h b/yass/third_party/icu/source/i18n/unicode/calendar.h index aa83866ac9..26781f8a05 100644 --- a/yass/third_party/icu/source/i18n/unicode/calendar.h +++ b/yass/third_party/icu/source/i18n/unicode/calendar.h @@ -1237,7 +1237,8 @@ public: /** * Clears the value in the given time field, both making it unset and assigning it a * value of zero. This field value will be determined during the next resolving of - * time into time fields. + * time into time fields. Clearing UCAL_ORDINAL_MONTH or UCAL_MONTH will + * clear both fields. * * @param field The time field to be cleared. * @stable ICU 2.6. diff --git a/yass/third_party/icu/source/i18n/unicode/displayoptions.h b/yass/third_party/icu/source/i18n/unicode/displayoptions.h index 7bc763bbac..9cc822626b 100644 --- a/yass/third_party/icu/source/i18n/unicode/displayoptions.h +++ b/yass/third_party/icu/source/i18n/unicode/displayoptions.h @@ -22,8 +22,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - /** * Represents all the display options that are supported by CLDR such as grammatical case, noun * class, ... etc. It currently supports enums, but may be extended in the future to have other @@ -40,14 +38,14 @@ U_NAMESPACE_BEGIN * .build(); * ``` * - * @draft ICU 72 + * @stable ICU 72 */ class U_I18N_API DisplayOptions { public: /** * Responsible for building `DisplayOptions`. * - * @draft ICU 72 + * @stable ICU 72 */ class U_I18N_API Builder { public: @@ -56,7 +54,7 @@ public: * * @param grammaticalCase The grammatical case. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setGrammaticalCase(UDisplayOptionsGrammaticalCase grammaticalCase) { this->grammaticalCase = grammaticalCase; @@ -68,7 +66,7 @@ public: * * @param nounClass The noun class. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setNounClass(UDisplayOptionsNounClass nounClass) { this->nounClass = nounClass; @@ -80,7 +78,7 @@ public: * * @param pluralCategory The plural category. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setPluralCategory(UDisplayOptionsPluralCategory pluralCategory) { this->pluralCategory = pluralCategory; @@ -92,7 +90,7 @@ public: * * @param capitalization The capitalization. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setCapitalization(UDisplayOptionsCapitalization capitalization) { this->capitalization = capitalization; @@ -104,7 +102,7 @@ public: * * @param nameStyle The name style. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setNameStyle(UDisplayOptionsNameStyle nameStyle) { this->nameStyle = nameStyle; @@ -116,7 +114,7 @@ public: * * @param displayLength The display length. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setDisplayLength(UDisplayOptionsDisplayLength displayLength) { this->displayLength = displayLength; @@ -128,7 +126,7 @@ public: * * @param substituteHandling The substitute handling. * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder &setSubstituteHandling(UDisplayOptionsSubstituteHandling substituteHandling) { this->substituteHandling = substituteHandling; @@ -139,7 +137,7 @@ public: * Builds the display options. * * @return DisplayOptions - * @draft ICU 72 + * @stable ICU 72 */ DisplayOptions build() { return DisplayOptions(*this); } @@ -162,21 +160,21 @@ public: * Creates a builder with the `UNDEFINED` values for all the parameters. * * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ static Builder builder(); /** * Creates a builder with the same parameters from this object. * * @return Builder - * @draft ICU 72 + * @stable ICU 72 */ Builder copyToBuilder() const; /** * Gets the grammatical case. * * @return UDisplayOptionsGrammaticalCase - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsGrammaticalCase getGrammaticalCase() const { return grammaticalCase; } @@ -184,7 +182,7 @@ public: * Gets the noun class. * * @return UDisplayOptionsNounClass - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsNounClass getNounClass() const { return nounClass; } @@ -192,7 +190,7 @@ public: * Gets the plural category. * * @return UDisplayOptionsPluralCategory - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsPluralCategory getPluralCategory() const { return pluralCategory; } @@ -200,7 +198,7 @@ public: * Gets the capitalization. * * @return UDisplayOptionsCapitalization - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsCapitalization getCapitalization() const { return capitalization; } @@ -208,7 +206,7 @@ public: * Gets the dialect handling. * * @return UDisplayOptionsNameStyle - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsNameStyle getNameStyle() const { return nameStyle; } @@ -216,7 +214,7 @@ public: * Gets the display length. * * @return UDisplayOptionsDisplayLength - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsDisplayLength getDisplayLength() const { return displayLength; } @@ -224,7 +222,7 @@ public: * Gets the substitute handling. * * @return UDisplayOptionsSubstituteHandling - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsSubstituteHandling getSubstituteHandling() const { return substituteHandling; } @@ -232,7 +230,7 @@ public: * Copies the DisplayOptions. * * @param other The options to copy. - * @draft ICU 72 + * @stable ICU 72 */ DisplayOptions &operator=(const DisplayOptions &other) = default; @@ -240,7 +238,7 @@ public: * Moves the DisplayOptions. * * @param other The options to move from. - * @draft ICU 72 + * @stable ICU 72 */ DisplayOptions &operator=(DisplayOptions &&other) noexcept = default; @@ -248,7 +246,7 @@ public: * Copies the DisplayOptions. * * @param other The options to copy. - * @draft ICU 72 + * @stable ICU 72 */ DisplayOptions(const DisplayOptions &other) = default; @@ -263,8 +261,6 @@ private: UDisplayOptionsSubstituteHandling substituteHandling; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/yass/third_party/icu/source/i18n/unicode/formattednumber.h b/yass/third_party/icu/source/i18n/unicode/formattednumber.h index 198c9d8478..83178ea40e 100644 --- a/yass/third_party/icu/source/i18n/unicode/formattednumber.h +++ b/yass/third_party/icu/source/i18n/unicode/formattednumber.h @@ -140,19 +140,15 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { */ MeasureUnit getOutputUnit(UErrorCode& status) const; -#ifndef U_HIDE_DRAFT_API - /** * Gets the noun class of the formatted output. Returns `UNDEFINED` when the noun class * is not supported yet. * * @return UDisplayOptionsNounClass - * @draft ICU 72 + * @stable ICU 72 */ UDisplayOptionsNounClass getNounClass(UErrorCode &status) const; -#endif // U_HIDE_DRAFT_API - #ifndef U_HIDE_INTERNAL_API /** diff --git a/yass/third_party/icu/source/i18n/unicode/gregocal.h b/yass/third_party/icu/source/i18n/unicode/gregocal.h index b85deb5ecf..ab5a9c1571 100644 --- a/yass/third_party/icu/source/i18n/unicode/gregocal.h +++ b/yass/third_party/icu/source/i18n/unicode/gregocal.h @@ -536,14 +536,6 @@ public: virtual int32_t monthLength(int32_t month, int32_t year) const; #ifndef U_HIDE_INTERNAL_API - /** - * return the length of the given year. - * @param year the given year. - * @return the length of the given year. - * @internal - */ - int32_t yearLength(int32_t year) const; - /** * return the length of the year field. * @return the length of the year field @@ -551,14 +543,6 @@ public: */ int32_t yearLength(void) const; - /** - * After adjustments such as add(MONTH), add(YEAR), we don't want the - * month to jump around. E.g., we don't want Jan 31 + 1 month to go to Mar - * 3, we want it to go to Feb 28. Adjustments which might run into this - * problem call this method to retain the proper month. - * @internal - */ - void pinDayOfMonth(void); #endif /* U_HIDE_INTERNAL_API */ /** diff --git a/yass/third_party/icu/source/i18n/unicode/measunit.h b/yass/third_party/icu/source/i18n/unicode/measunit.h index f7acc6990b..8f35835782 100644 --- a/yass/third_party/icu/source/i18n/unicode/measunit.h +++ b/yass/third_party/icu/source/i18n/unicode/measunit.h @@ -371,7 +371,8 @@ class U_I18N_API MeasureUnit: public UObject { /** * Default constructor. - * Populates the instance with the base dimensionless unit. + * Populates the instance with the base dimensionless unit, which means that there will be + * no unit on the formatted number. * @stable ICU 3.0 */ MeasureUnit(); @@ -1527,23 +1528,21 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getNanosecond(); -#ifndef U_HIDE_DRAFT_API /** * Returns by pointer, unit of duration: quarter. * Caller owns returned value and must free it. * Also see {@link #getQuarter()}. * @param status ICU error code. - * @draft ICU 72 + * @stable ICU 72 */ static MeasureUnit *createQuarter(UErrorCode &status); /** * Returns by value, unit of duration: quarter. * Also see {@link #createQuarter()}. - * @draft ICU 72 + * @stable ICU 72 */ static MeasureUnit getQuarter(); -#endif /* U_HIDE_DRAFT_API */ /** * Returns by pointer, unit of duration: second. @@ -2737,23 +2736,21 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getTon(); -#ifndef U_HIDE_DRAFT_API /** * Returns by pointer, unit of mass: tonne. * Caller owns returned value and must free it. * Also see {@link #getTonne()}. * @param status ICU error code. - * @draft ICU 72 + * @stable ICU 72 */ static MeasureUnit *createTonne(UErrorCode &status); /** * Returns by value, unit of mass: tonne. * Also see {@link #createTonne()}. - * @draft ICU 72 + * @stable ICU 72 */ static MeasureUnit getTonne(); -#endif /* U_HIDE_DRAFT_API */ /** * Returns by pointer, unit of power: gigawatt. @@ -2883,6 +2880,24 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getBar(); +#ifndef U_HIDE_DRAFT_API + /** + * Returns by pointer, unit of pressure: gasoline-energy-density. + * Caller owns returned value and must free it. + * Also see {@link #getGasolineEnergyDensity()}. + * @param status ICU error code. + * @draft ICU 74 + */ + static MeasureUnit *createGasolineEnergyDensity(UErrorCode &status); + + /** + * Returns by value, unit of pressure: gasoline-energy-density. + * Also see {@link #createGasolineEnergyDensity()}. + * @draft ICU 74 + */ + static MeasureUnit getGasolineEnergyDensity(); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns by pointer, unit of pressure: hectopascal. * Caller owns returned value and must free it. diff --git a/yass/third_party/icu/source/i18n/unicode/measure.h b/yass/third_party/icu/source/i18n/unicode/measure.h index 31f931d7d4..fbef17c8f0 100644 --- a/yass/third_party/icu/source/i18n/unicode/measure.h +++ b/yass/third_party/icu/source/i18n/unicode/measure.h @@ -89,6 +89,16 @@ class U_I18N_API Measure: public UObject { */ bool operator==(const UObject& other) const; +#ifndef U_HIDE_DRAFT_API + /** + * Inequality operator. Returns true if this object is not equal to the other object. + * @param other the object to compare with + * @return true if the objects are not equal + * @draft ICU 74 + */ + inline bool operator!=(const UObject& other) const { return !operator==(other); } +#endif // U_HIDE_DRAFT_API + /** * Return a reference to the numeric value of this object. The * numeric value may be of any numeric type supported by diff --git a/yass/third_party/icu/source/i18n/unicode/numberformatter.h b/yass/third_party/icu/source/i18n/unicode/numberformatter.h index caf98e3dfa..069324a9ef 100644 --- a/yass/third_party/icu/source/i18n/unicode/numberformatter.h +++ b/yass/third_party/icu/source/i18n/unicode/numberformatter.h @@ -80,6 +80,9 @@ * This API is based on the fluent design pattern popularized by libraries such as Google's Guava. For * extensive details on the design of this API, read the design doc. * + *

    + * Note: To format monetary/currency values, specify the currency in the `.unit()` function. + * * @author Shane Carr */ @@ -2257,14 +2260,13 @@ class U_I18N_API NumberFormatterSettings { */ Derived usage(StringPiece usage) &&; -#ifndef U_HIDE_DRAFT_API /** * Specifies the DisplayOptions. For example, UDisplayOptionsGrammaticalCase specifies * the desired case for a unit formatter's output (e.g. accusative, dative, genitive). * * @param displayOptions * @return The fluent chain. - * @draft ICU 72 + * @stable ICU 72 */ Derived displayOptions(const DisplayOptions &displayOptions) const &; @@ -2273,10 +2275,9 @@ class U_I18N_API NumberFormatterSettings { * * @param displayOptions * @return The fluent chain. - * @draft ICU 72 + * @stable ICU 72 */ Derived displayOptions(const DisplayOptions &displayOptions) &&; -#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_INTERNAL_API /** diff --git a/yass/third_party/icu/source/i18n/unicode/rbnf.h b/yass/third_party/icu/source/i18n/unicode/rbnf.h index 0336ac1f81..f9a1c8f93a 100644 --- a/yass/third_party/icu/source/i18n/unicode/rbnf.h +++ b/yass/third_party/icu/source/i18n/unicode/rbnf.h @@ -64,18 +64,20 @@ enum URBNFRuleSetTag { * @stable ICU 2.2 */ URBNF_ORDINAL, +#ifndef U_HIDE_DEPRECATED_API /** * Requests predefined ruleset for formatting a value as a duration in hours, minutes, and seconds. - * @stable ICU 2.2 + * @deprecated ICU 74 Use MeasureFormat instead. */ URBNF_DURATION, +#endif // U_HIDE_DERECATED_API /** * Requests predefined ruleset for various non-place-value numbering systems. * WARNING: The same resource contains rule sets for a variety of different numbering systems. * You need to call setDefaultRuleSet() on the formatter to choose the actual numbering system. * @stable ICU 2.2 */ - URBNF_NUMBERING_SYSTEM, + URBNF_NUMBERING_SYSTEM = 3, #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal URBNFRuleSetTag value. diff --git a/yass/third_party/icu/source/i18n/unicode/reldatefmt.h b/yass/third_party/icu/source/i18n/unicode/reldatefmt.h index 4123468c65..5dc4905b12 100644 --- a/yass/third_party/icu/source/i18n/unicode/reldatefmt.h +++ b/yass/third_party/icu/source/i18n/unicode/reldatefmt.h @@ -248,8 +248,6 @@ typedef enum UDateDirection { #endif // U_HIDE_DEPRECATED_API } UDateDirection; -#if !UCONFIG_NO_BREAK_ITERATION - U_NAMESPACE_BEGIN class BreakIterator; @@ -560,7 +558,7 @@ public: * * This method returns a String. To get more information about the * formatting result, use formatNumericToValue(). - * + * * @param offset The signed offset for the specified unit. This * will be formatted according to this object's * NumberFormat object. @@ -586,7 +584,7 @@ public: * * This method returns a FormattedRelativeDateTime, which exposes more * information than the String returned by formatNumeric(). - * + * * @param offset The signed offset for the specified unit. This * will be formatted according to this object's * NumberFormat object. @@ -696,11 +694,19 @@ private: const SharedPluralRules *fPluralRules; UDateRelativeDateTimeFormatterStyle fStyle; UDisplayContext fContext; +#if !UCONFIG_NO_BREAK_ITERATION const SharedBreakIterator *fOptBreakIterator; +#else + std::nullptr_t fOptBreakIterator = nullptr; +#endif // !UCONFIG_NO_BREAK_ITERATION Locale fLocale; void init( NumberFormat *nfToAdopt, +#if !UCONFIG_NO_BREAK_ITERATION BreakIterator *brkIter, +#else + std::nullptr_t, +#endif // !UCONFIG_NO_BREAK_ITERATION UErrorCode &status); UnicodeString& adjustForContext(UnicodeString &) const; UBool checkNoAdjustForContext(UErrorCode& status) const; @@ -743,7 +749,6 @@ private: U_NAMESPACE_END -#endif /* !UCONFIG_NO_BREAK_ITERATION */ #endif /* !UCONFIG_NO_FORMATTING */ #endif /* U_SHOW_CPLUSPLUS_API */ diff --git a/yass/third_party/icu/source/i18n/unicode/timezone.h b/yass/third_party/icu/source/i18n/unicode/timezone.h index a2c0552c18..e512ef7f92 100644 --- a/yass/third_party/icu/source/i18n/unicode/timezone.h +++ b/yass/third_party/icu/source/i18n/unicode/timezone.h @@ -444,6 +444,37 @@ public: static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id, UnicodeString& canonicalID, UBool& isSystemID, UErrorCode& status); + +#ifndef U_HIDE_DRAFT_API + /** + * Returns the preferred time zone ID in the IANA time zone database for the given time zone ID. + * There are two types of preferred IDs. The first type is the one defined in zone.tab file, + * such as "America/Los_Angeles". The second types is the one defined for zones not associated + * with a specific region, but not defined with "Link" syntax such as "Etc/GMT+10". + * + *

    Note: For most of valid time zone IDs, this method returns an ID same as getCanonicalID(). + * getCanonicalID() is based on canonical time zone IDs defined in Unicode CLDR. + * These canonical time zone IDs in CLDR were based on very old version of the time zone database. + * In the IANA time zone database, some IDs were updated since then. This API returns a newer + * time zone ID. For example, CLDR defines "Asia/Calcutta" as the canonical time zone ID. This + * method returns "Asia/Kolkata" instead. + *

    "Etc/Unknown" is a special time zone ID defined by CLDR. There are no corresponding zones + * in the IANA time zone database. Therefore, this API returns U_ILLEGAL_ARGUMENT_ERROR when the + * input ID is "Etc/Unknown". + * + * @param id The input time zone ID. + * @param ianaID Receives the preferred time zone ID in the IANA time zone database. When + * the given time zone ID is not a known time zone ID, this method sets an + * invalid (bogus) string. + * @param status Receives the status. When the given time zone ID is not a known time zone + * ID, U_ILLEGAL_ARGUMENT_ERROR is set. + * @return A reference to the result. + * @draft ICU 74 + */ + static UnicodeString& U_EXPORT2 getIanaID(const UnicodeString&id, UnicodeString& ianaID, + UErrorCode& status); +#endif // U_HIDE_DRAFT_API + /** * Converts a system time zone ID to an equivalent Windows time zone ID. For example, * Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles". diff --git a/yass/third_party/icu/source/i18n/unicode/ucal.h b/yass/third_party/icu/source/i18n/unicode/ucal.h index f428dfcc61..f351b6c9db 100644 --- a/yass/third_party/icu/source/i18n/unicode/ucal.h +++ b/yass/third_party/icu/source/i18n/unicode/ucal.h @@ -1393,6 +1393,38 @@ ucal_getTZDataVersion(UErrorCode* status); U_CAPI int32_t U_EXPORT2 ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len, UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status); + +#ifndef U_HIDE_DRAFT_API +/** + * Returns the preferred time zone ID in the IANA time zone database for the given time zone ID. + * There are two types of preferred IDs. The first type is the one defined in zone.tab file, + * such as "America/Los_Angeles". The second types is the one defined for zones not associated + * with a specific region, but not defined with "Link" syntax such as "Etc/GMT+10". + * + *

    Note: For most of valid time zone IDs, this method returns an ID same as ucal_getCanonicalTimeZoneID(). + * ucal_getCanonicalTimeZoneID() is based on canonical time zone IDs defined in Unicode CLDR. + * These canonical time zone IDs in CLDR were based on very old version of the time zone database. + * In the IANA time zone database, some IDs were updated since then. This API returns a newer + * time zone ID. For example, CLDR defines "Asia/Calcutta" as the canonical time zone ID. This + * method returns "Asia/Kolkata" instead. + *

    "Etc/Unknown" is a special time zone ID defined by CLDR. There are no corresponding zones + * in the IANA time zone database. Therefore, this API returns U_ILLEGAL_ARGUMENT_ERROR when the + * input ID is "Etc/Unknown". + * + * @param id The input time zone ID. + * @param len The length of the input time zone ID. + * @param result The buffer receives the preferred time zone ID in the IANA time zone database. + * @param resultCapacity The capacity of the result buffer. + * @param status Receives the status. When the given time zone ID is not a known system time zone + * ID, U_ILLEGAL_ARGUMENT_ERROR is set. + * @return The result string length, not including the terminating null. + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ucal_getIanaTimeZoneID(const UChar* id, int32_t len, + UChar* result, int32_t resultCapacity, UErrorCode* status); +#endif // U_HIDE_DRAFT_API + /** * Get the resource keyword value string designating the calendar type for the UCalendar. * @param cal The UCalendar to query. diff --git a/yass/third_party/icu/source/i18n/unicode/ucol.h b/yass/third_party/icu/source/i18n/unicode/ucol.h index 2496331221..e4ec2f7418 100644 --- a/yass/third_party/icu/source/i18n/unicode/ucol.h +++ b/yass/third_party/icu/source/i18n/unicode/ucol.h @@ -251,42 +251,52 @@ typedef enum { */ UCOL_FRENCH_COLLATION, /** Attribute for handling variable elements. - * Acceptable values are UCOL_NON_IGNORABLE (default) - * which treats all the codepoints with non-ignorable + * Acceptable values are UCOL_NON_IGNORABLE + * which treats all the codepoints with non-ignorable * primary weights in the same way, - * and UCOL_SHIFTED which causes codepoints with primary + * and UCOL_SHIFTED which causes codepoints with primary * weights that are equal or below the variable top value - * to be ignored on primary level and moved to the quaternary - * level. + * to be ignored on primary level and moved to the quaternary + * level. The default setting in a Collator object depends on the + * locale data loaded from the resources. For most locales, the + * default is UCOL_NON_IGNORABLE, but for others, such as "th", + * the default could be UCOL_SHIFTED. * @stable ICU 2.0 */ - UCOL_ALTERNATE_HANDLING, + UCOL_ALTERNATE_HANDLING, /** Controls the ordering of upper and lower case letters. - * Acceptable values are UCOL_OFF (default), which orders + * Acceptable values are UCOL_OFF, which orders * upper and lower case letters in accordance to their tertiary - * weights, UCOL_UPPER_FIRST which forces upper case letters to - * sort before lower case letters, and UCOL_LOWER_FIRST which does - * the opposite. + * weights, UCOL_UPPER_FIRST which forces upper case letters to + * sort before lower case letters, and UCOL_LOWER_FIRST which does + * the opposite. The default setting in a Collator object depends on the + * locale data loaded from the resources. For most locales, the + * default is UCOL_OFF, but for others, such as "da" or "mt", + * the default could be UCOL_UPPER. * @stable ICU 2.0 */ - UCOL_CASE_FIRST, + UCOL_CASE_FIRST, /** Controls whether an extra case level (positioned before the third - * level) is generated or not. Acceptable values are UCOL_OFF (default), + * level) is generated or not. Acceptable values are UCOL_OFF, * when case level is not generated, and UCOL_ON which causes the case * level to be generated. Contents of the case level are affected by - * the value of UCOL_CASE_FIRST attribute. A simple way to ignore + * the value of UCOL_CASE_FIRST attribute. A simple way to ignore * accent differences in a string is to set the strength to UCOL_PRIMARY - * and enable case level. + * and enable case level. The default setting in a Collator object depends + * on the locale data loaded from the resources. * @stable ICU 2.0 */ UCOL_CASE_LEVEL, /** Controls whether the normalization check and necessary normalizations - * are performed. When set to UCOL_OFF (default) no normalization check - * is performed. The correctness of the result is guaranteed only if the + * are performed. When set to UCOL_OFF no normalization check + * is performed. The correctness of the result is guaranteed only if the * input data is in so-called FCD form (see users manual for more info). * When set to UCOL_ON, an incremental check is performed to see whether * the input data is in the FCD form. If the data is not in the FCD form, - * incremental NFD normalization is performed. + * incremental NFD normalization is performed. The default setting in a + * Collator object depends on the locale data loaded from the resources. + * For many locales, the default is UCOL_OFF, but for others, such as "hi" + * "vi', or "bn", * the default could be UCOL_ON. * @stable ICU 2.0 */ UCOL_NORMALIZATION_MODE, diff --git a/yass/third_party/icu/source/i18n/unicode/udisplayoptions.h b/yass/third_party/icu/source/i18n/unicode/udisplayoptions.h index 1ecdf1d8e9..490bc01cf3 100644 --- a/yass/third_party/icu/source/i18n/unicode/udisplayoptions.h +++ b/yass/third_party/icu/source/i18n/unicode/udisplayoptions.h @@ -18,47 +18,45 @@ #include "unicode/uversion.h" -#ifndef U_HIDE_DRAFT_API - /** * Represents all the grammatical cases that are supported by CLDR. * - * @draft ICU 72 + * @stable ICU 72 */ typedef enum UDisplayOptionsGrammaticalCase { /** * A possible setting for GrammaticalCase. * The grammatical case context to be used is unknown (this is the default value). - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_UNDEFINED = 0, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_ABLATIVE = 1, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_ACCUSATIVE = 2, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_COMITATIVE = 3, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_DATIVE = 4, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_ERGATIVE = 5, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_GENITIVE = 6, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_INSTRUMENTAL = 7, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_LOCATIVE = 8, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_LOCATIVE_COPULATIVE = 9, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_NOMINATIVE = 10, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_OBLIQUE = 11, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_PREPOSITIONAL = 12, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_SOCIATIVE = 13, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_GRAMMATICAL_CASE_VOCATIVE = 14, } UDisplayOptionsGrammaticalCase; @@ -66,7 +64,7 @@ typedef enum UDisplayOptionsGrammaticalCase { * @param grammaticalCase The grammatical case. * @return the lowercase CLDR keyword string for the grammatical case. * - * @draft ICU 72 + * @stable ICU 72 */ U_CAPI const char * U_EXPORT2 udispopt_getGrammaticalCaseIdentifier(UDisplayOptionsGrammaticalCase grammaticalCase); @@ -75,7 +73,7 @@ udispopt_getGrammaticalCaseIdentifier(UDisplayOptionsGrammaticalCase grammatical * @param identifier in lower case such as "dative" or "nominative" * @return the plural category corresponding to the identifier, or `UDISPOPT_GRAMMATICAL_CASE_UNDEFINED` * - * @draft ICU 72 + * @stable ICU 72 */ U_CAPI UDisplayOptionsGrammaticalCase U_EXPORT2 udispopt_fromGrammaticalCaseIdentifier(const char *identifier); @@ -84,7 +82,7 @@ udispopt_fromGrammaticalCaseIdentifier(const char *identifier); * Standard CLDR plural form/category constants. * See https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules * - * @draft ICU 72 + * @stable ICU 72 */ typedef enum UDisplayOptionsPluralCategory { @@ -92,20 +90,20 @@ typedef enum UDisplayOptionsPluralCategory { * A possible setting for PluralCategory. * The plural category case context to be used is unknown (this is the default value). * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_UNDEFINED = 0, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_ZERO = 1, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_ONE = 2, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_TWO = 3, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_FEW = 4, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_MANY = 5, - /** @draft ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_PLURAL_CATEGORY_OTHER = 6, } UDisplayOptionsPluralCategory; @@ -113,7 +111,7 @@ typedef enum UDisplayOptionsPluralCategory { * @param pluralCategory The plural category. * @return the lowercase CLDR identifier string for the plural category. * - * @draft ICU 72 + * @stable ICU 72 */ U_CAPI const char * U_EXPORT2 udispopt_getPluralCategoryIdentifier(UDisplayOptionsPluralCategory pluralCategory); @@ -123,7 +121,7 @@ udispopt_getPluralCategoryIdentifier(UDisplayOptionsPluralCategory pluralCategor * @return the plural category corresponding to the identifier (plural keyword), * or `UDISPOPT_PLURAL_CATEGORY_UNDEFINED` * - * @draft ICU 72 + * @stable ICU 72 */ U_CAPI UDisplayOptionsPluralCategory U_EXPORT2 udispopt_fromPluralCategoryIdentifier(const char *identifier); @@ -131,31 +129,31 @@ udispopt_fromPluralCategoryIdentifier(const char *identifier); /** * Represents all the grammatical noun classes that are supported by CLDR. * - * @draft ICU 72. + * @stable ICU 72. */ typedef enum UDisplayOptionsNounClass { /** * A possible setting for NounClass. * The noun class case context to be used is unknown (this is the default value). * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_NOUN_CLASS_UNDEFINED = 0, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_OTHER = 1, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_NEUTER = 2, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_FEMININE = 3, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_MASCULINE = 4, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_ANIMATE = 5, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_INANIMATE = 6, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_PERSONAL = 7, - /** ICU 72 */ + /** @stable ICU 72 */ UDISPOPT_NOUN_CLASS_COMMON = 8, } UDisplayOptionsNounClass; @@ -163,7 +161,7 @@ typedef enum UDisplayOptionsNounClass { * @param nounClass The noun class. * @return the lowercase CLDR keyword string for the noun class. * - * @draft ICU 72 + * @stable ICU 72 */ U_CAPI const char * U_EXPORT2 udispopt_getNounClassIdentifier(UDisplayOptionsNounClass nounClass); @@ -172,7 +170,7 @@ udispopt_getNounClassIdentifier(UDisplayOptionsNounClass nounClass); * @param identifier in lower case such as "feminine" or "masculine" * @return the plural category corresponding to the identifier, or `UDISPOPT_NOUN_CLASS_UNDEFINED` * - * @draft ICU 72 + * @stable ICU 72 */ U_CAPI UDisplayOptionsNounClass U_EXPORT2 udispopt_fromNounClassIdentifier(const char *identifier); @@ -180,14 +178,14 @@ udispopt_fromNounClassIdentifier(const char *identifier); /** * Represents all the capitalization options. * - * @draft ICU 72 + * @stable ICU 72 */ typedef enum UDisplayOptionsCapitalization { /** * A possible setting for Capitalization. * The capitalization context to be used is unknown (this is the default value). * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_CAPITALIZATION_UNDEFINED = 0, @@ -195,7 +193,7 @@ typedef enum UDisplayOptionsCapitalization { * The capitalization context if a date, date symbol or display name is to be * formatted with capitalization appropriate for the beginning of a sentence. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_CAPITALIZATION_BEGINNING_OF_SENTENCE = 1, @@ -203,7 +201,7 @@ typedef enum UDisplayOptionsCapitalization { * The capitalization context if a date, date symbol or display name is to be * formatted with capitalization appropriate for the middle of a sentence. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_CAPITALIZATION_MIDDLE_OF_SENTENCE = 2, @@ -212,7 +210,7 @@ typedef enum UDisplayOptionsCapitalization { * formatted with capitalization appropriate for stand-alone usage such as an * isolated name on a calendar page. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_CAPITALIZATION_STANDALONE = 3, @@ -220,7 +218,7 @@ typedef enum UDisplayOptionsCapitalization { * The capitalization context if a date, date symbol or display name is to be * formatted with capitalization appropriate for a user-interface list or menu item. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_CAPITALIZATION_UI_LIST_OR_MENU = 4, } UDisplayOptionsCapitalization; @@ -228,14 +226,14 @@ typedef enum UDisplayOptionsCapitalization { /** * Represents all the dialect handlings. * - * @draft ICU 72 + * @stable ICU 72 */ typedef enum UDisplayOptionsNameStyle { /** * A possible setting for NameStyle. * The NameStyle context to be used is unknown (this is the default value). * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_NAME_STYLE_UNDEFINED = 0, @@ -243,7 +241,7 @@ typedef enum UDisplayOptionsNameStyle { * Use standard names when generating a locale name, * e.g. en_GB displays as 'English (United Kingdom)'. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_NAME_STYLE_STANDARD_NAMES = 1, @@ -251,7 +249,7 @@ typedef enum UDisplayOptionsNameStyle { * Use dialect names, when generating a locale name, * e.g. en_GB displays as 'British English'. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_NAME_STYLE_DIALECT_NAMES = 2, } UDisplayOptionsNameStyle; @@ -259,14 +257,14 @@ typedef enum UDisplayOptionsNameStyle { /** * Represents all the display lengths. * - * @draft ICU 72 + * @stable ICU 72 */ typedef enum UDisplayOptionsDisplayLength { /** * A possible setting for DisplayLength. * The DisplayLength context to be used is unknown (this is the default value). * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_DISPLAY_LENGTH_UNDEFINED = 0, @@ -274,7 +272,7 @@ typedef enum UDisplayOptionsDisplayLength { * Uses full names when generating a locale name, * e.g. "United States" for US. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_DISPLAY_LENGTH_FULL = 1, @@ -282,7 +280,7 @@ typedef enum UDisplayOptionsDisplayLength { * Use short names when generating a locale name, * e.g. "U.S." for US. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_DISPLAY_LENGTH_SHORT = 2, } UDisplayOptionsDisplayLength; @@ -290,7 +288,7 @@ typedef enum UDisplayOptionsDisplayLength { /** * Represents all the substitute handling. * - * @draft ICU 72 + * @stable ICU 72 */ typedef enum UDisplayOptionsSubstituteHandling { @@ -298,7 +296,7 @@ typedef enum UDisplayOptionsSubstituteHandling { * A possible setting for SubstituteHandling. * The SubstituteHandling context to be used is unknown (this is the default value). * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_SUBSTITUTE_HANDLING_UNDEFINED = 0, @@ -306,20 +304,18 @@ typedef enum UDisplayOptionsSubstituteHandling { * Returns a fallback value (e.g., the input code) when no data is available. * This is the default behaviour. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_SUBSTITUTE_HANDLING_SUBSTITUTE = 1, /** * Returns a null value when no data is available. * - * @draft ICU 72 + * @stable ICU 72 */ UDISPOPT_SUBSTITUTE_HANDLING_NO_SUBSTITUTE = 2, } UDisplayOptionsSubstituteHandling; -#endif // U_HIDE_DRAFT_API - #endif /* #if !UCONFIG_NO_FORMATTING */ #endif // __UDISPLAYOPTIONS_H__ diff --git a/yass/third_party/icu/source/i18n/unicode/unum.h b/yass/third_party/icu/source/i18n/unicode/unum.h index 172ed08964..2dd2badc1b 100644 --- a/yass/third_party/icu/source/i18n/unicode/unum.h +++ b/yass/third_party/icu/source/i18n/unicode/unum.h @@ -1107,7 +1107,6 @@ typedef enum UNumberFormatAttribute { } UNumberFormatAttribute; -#ifndef U_HIDE_DRAFT_API /** * Returns true if the formatter supports the specified attribute and false if not. * @param fmt The formatter to query. @@ -1120,12 +1119,11 @@ typedef enum UNumberFormatAttribute { * @see unum_setDoubleAttribute * @see unum_getTextAttribute * @see unum_setTextAttribute -* @draft ICU 72 +* @stable ICU 72 */ U_CAPI bool U_EXPORT2 unum_hasAttribute(const UNumberFormat* fmt, UNumberFormatAttribute attr); -#endif // U_HIDE_DRAFT_API /** * Get a numeric attribute associated with a UNumberFormat. diff --git a/yass/third_party/icu/source/i18n/unicode/ureldatefmt.h b/yass/third_party/icu/source/i18n/unicode/ureldatefmt.h index 3c44890043..0882360d14 100644 --- a/yass/third_party/icu/source/i18n/unicode/ureldatefmt.h +++ b/yass/third_party/icu/source/i18n/unicode/ureldatefmt.h @@ -12,7 +12,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION +#if !UCONFIG_NO_FORMATTING #include "unicode/unum.h" #include "unicode/udisplaycontext.h" @@ -505,6 +505,6 @@ ureldatefmt_combineDateAndTime( const URelativeDateTimeFormatter* reldatefmt, int32_t resultCapacity, UErrorCode* status ); -#endif /* !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION */ +#endif /* !UCONFIG_NO_FORMATTING */ #endif diff --git a/yass/third_party/icu/source/i18n/unicode/uspoof.h b/yass/third_party/icu/source/i18n/unicode/uspoof.h index 442655bb54..20d29d62b2 100644 --- a/yass/third_party/icu/source/i18n/unicode/uspoof.h +++ b/yass/third_party/icu/source/i18n/unicode/uspoof.h @@ -19,6 +19,7 @@ #ifndef USPOOF_H #define USPOOF_H +#include "unicode/ubidi.h" #include "unicode/utypes.h" #include "unicode/uset.h" #include "unicode/parseerr.h" @@ -83,6 +84,25 @@ * the instance should be created once (e.g., upon application startup), and the efficient * {@link uspoof_areConfusable} method can be used at runtime. * + * If the paragraph direction used to display the strings is known, the bidi function should be used instead: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * // These strings look identical when rendered in a left-to-right context. + * // They look distinct in a right-to-left context. + * UChar* str1 = (UChar*) u"A1\u05D0"; // A1א + * UChar* str2 = (UChar*) u"A\u05D01"; // Aא1 + * + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status); + * + * int32_t bitmask = uspoof_areBidiConfusable(sc, UBIDI_LTR, str1, -1, str2, -1, &status); + * UBool result = bitmask != 0; + * // areBidiConfusable: 1 (status: U_ZERO_ERROR) + * printf("areBidiConfusable: %d (status: %s)\n", result, u_errorName(status)); + * uspoof_close(sc); + * \endcode + * *

    * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call * {@link uspoof_close} when the object goes out of scope: @@ -339,6 +359,51 @@ * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple * scripts. * + *

    Advanced bidirectional usage

    + * If the paragraph direction with which the identifiers will be displayed is not known, there are + * multiple options for confusable detection depending on the circumstances. + * + *

    + * In some circumstances, the only concern is confusion between identifiers displayed with the same + * paragraph direction. + * + *

    + * An example is the case where identifiers are usernames prefixed with the @ symbol. + * That symbol will appear to the left in a left-to-right context, and to the right in a + * right-to-left context, so that an identifier displayed in a left-to-right context can never be + * confused with an identifier displayed in a right-to-left context: + *

      + *
    • + * The usernames "A1א" (A one aleph) and "Aא1" (A aleph 1) + * would be considered confusable, since they both appear as \@A1א in a left-to-right context, and the + * usernames "אA_1" (aleph A underscore one) and "א1_A" (aleph one underscore A) would be considered + * confusable, since they both appear as A_1א@ in a right-to-left context. + *
    • + *
    • + * The username "Mark_" would not be considered confusable with the username "_Mark", + * even though the latter would appear as Mark_@ in a right-to-left context, and the + * former as \@Mark_ in a left-to-right context. + *
    • + *
    + *

    + * In that case, the caller should check for both LTR-confusability and RTL-confusability: + * + * \code{.cpp} + * bool confusableInEitherDirection = + * uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, id1, id2, &status) || + * uspoof_areBidiConfusableUnicodeString(sc, UBIDI_RTL, id1, id2, &status); + * \endcode + * + * If the bidiSkeleton is used, the LTR and RTL skeleta should be kept separately and compared, LTR + * with LTR and RTL with RTL. + * + *

    + * In cases where confusability between the visual appearances of an identifier displayed in a + * left-to-right context with another identifier displayed in a right-to-left context is a concern, + * the LTR skeleton of one can be compared with the RTL skeleton of the other. However, this + * very broad definition of confusability may have unexpected results; for instance, it treats the + * ASCII identifiers "Mark_" and "_Mark" as confusable. + * *

    Additional Information

    * * A USpoofChecker instance may be used repeatedly to perform checks on any number of identifiers. @@ -519,7 +584,7 @@ typedef enum USpoofChecks { /** - * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and + * Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and * for returned identifier restriction levels in check results. * * @stable ICU 51 @@ -633,8 +698,8 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng /** * Open a Spoof Checker from the source form of the spoof data. * The input corresponds to the Unicode data file confusables.txt - * as described in Unicode UAX #39. The syntax of the source data - * is as described in UAX #39 for this file, and the content of + * as described in Unicode Technical Standard #39. The syntax of the source data + * is as described in UTS #39 for this file, and the content of * this file is acceptable input. * * The character encoding of the (char *) input text is UTF-8. @@ -1111,7 +1176,7 @@ uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode * /** - * Check the whether two specified strings are visually confusable. + * Check whether two specified strings are visually confusable. * * If the strings are confusable, the return value will be nonzero, as long as * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks(). @@ -1159,7 +1224,58 @@ uspoof_areConfusable(const USpoofChecker *sc, const UChar *id2, int32_t length2, UErrorCode *status); - +#ifndef U_HIDE_DRAFT_API +/** + * Check whether two specified strings are visually confusable when + * displayed in a context with the given paragraph direction. + * + * If the strings are confusable, the return value will be nonzero, as long as + * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks(). + * + * The bits in the return value correspond to flags for each of the classes of + * confusables applicable to the two input strings. According to UTS 39 + * section 4, the possible flags are: + * + *
      + *
    • {@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}
    • + *
    • {@link USPOOF_MIXED_SCRIPT_CONFUSABLE}
    • + *
    • {@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}
    • + *
    + * + * If one or more of the above flags were not listed in uspoof_setChecks(), this + * function will never report that class of confusable. The check + * {@link USPOOF_CONFUSABLE} enables all three flags. + * + * + * @param sc The USpoofChecker + * @param direction The paragraph direction with which the identifiers are + * displayed. Must be either UBIDI_LTR or UBIDI_RTL. + * @param id1 The first of the two identifiers to be compared for + * confusability. The strings are in UTF-16 format. + * @param length1 the length of the first identifier, expressed in + * 16 bit UTF-16 code units, or -1 if the string is + * nul terminated. + * @param id2 The second of the two identifiers to be compared for + * confusability. The identifiers are in UTF-16 format. + * @param length2 The length of the second identifiers, expressed in + * 16 bit UTF-16 code units, or -1 if the string is + * nul terminated. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Confusability of the identifiers is not reported here, + * but through this function's return value. + * @return An integer value with bit(s) set corresponding to + * the type of confusability found, as defined by + * enum USpoofChecks. Zero is returned if the identifiers + * are not confusable. + * + * @draft ICU 74 + */ +U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction, + const UChar *id1, int32_t length1, + const UChar *id2, int32_t length2, + UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ /** * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format. @@ -1192,14 +1308,45 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id2, int32_t length2, UErrorCode *status); - - +#ifndef U_HIDE_DRAFT_API +/** + * A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format. + * + * @param sc The USpoofChecker + * @param direction The paragraph direction with which the identifiers are + * displayed. Must be either UBIDI_LTR or UBIDI_RTL. + * @param id1 The first of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param length1 the length of the first identifiers, in bytes, or -1 + * if the string is nul terminated. + * @param id2 The second of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param length2 The length of the second string in bytes, or -1 + * if the string is nul terminated. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Confusability of the strings is not reported here, + * but through this function's return value. + * @return An integer value with bit(s) set corresponding to + * the type of confusability found, as defined by + * enum USpoofChecks. Zero is returned if the strings + * are not confusable. + * + * @draft ICU 74 + * + * @see uspoof_areBidiConfusable + */ +U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction, + const char *id1, int32_t length1, + const char *id2, int32_t length2, + UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ /** * Get the "skeleton" for an identifier. * Skeletons are a transformation of the input identifier; * Two identifiers are confusable if their skeletons are identical. - * See Unicode UAX #39 for additional information. + * See Unicode Technical Standard #39 for additional information. * * Using skeletons directly makes it possible to quickly check * whether an identifier is confusable with any of some large @@ -1233,11 +1380,50 @@ uspoof_getSkeleton(const USpoofChecker *sc, UChar *dest, int32_t destCapacity, UErrorCode *status); +#ifndef U_HIDE_DRAFT_API +/** + * Get the "bidiSkeleton" for an identifier and a direction. + * Skeletons are a transformation of the input identifier; + * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical; + * they are RTL-confusable if their RTL bidiSkeletons are identical. + * See Unicode Technical Standard #39 for additional information: + * https://www.unicode.org/reports/tr39/#Confusable_Detection. + * + * Using skeletons directly makes it possible to quickly check + * whether an identifier is confusable with any of some large + * set of existing identifiers, by creating an efficiently + * searchable collection of the skeletons. + * + * @param sc The USpoofChecker. + * @param direction The context direction with which the identifier will be + * displayed. Must be either UBIDI_LTR or UBIDI_RTL. + * @param id The input identifier whose skeleton will be computed. + * @param length The length of the input identifier, expressed in 16 bit + * UTF-16 code units, or -1 if the string is zero terminated. + * @param dest The output buffer, to receive the skeleton string. + * @param destCapacity The length of the output buffer, in 16 bit units. + * The destCapacity may be zero, in which case the function will + * return the actual length of the skeleton. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * @return The length of the skeleton string. The returned length + * is always that of the complete skeleton, even when the + * supplied buffer is too small (or of zero length) + * + * @draft ICU 74 + * @see uspoof_areBidiConfusable + */ +U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, + UBiDiDirection direction, + const UChar *id, int32_t length, + UChar *dest, int32_t destCapacity, UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ + /** * Get the "skeleton" for an identifier. * Skeletons are a transformation of the input identifier; * Two identifiers are confusable if their skeletons are identical. - * See Unicode UAX #39 for additional information. + * See Unicode Technical Standard #39 for additional information. * * Using skeletons directly makes it possible to quickly check * whether an identifier is confusable with any of some large @@ -1273,6 +1459,46 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc, char *dest, int32_t destCapacity, UErrorCode *status); +#ifndef U_HIDE_DRAFT_API +/** + * Get the "bidiSkeleton" for an identifier and a direction. + * Skeletons are a transformation of the input identifier; + * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical; + * they are RTL-confusable if their RTL bidiSkeletons are identical. + * See Unicode Technical Standard #39 for additional information: + * https://www.unicode.org/reports/tr39/#Confusable_Detection. + * + * Using skeletons directly makes it possible to quickly check + * whether an identifier is confusable with any of some large + * set of existing identifiers, by creating an efficiently + * searchable collection of the skeletons. + * + * @param sc The USpoofChecker + * @param direction The context direction with which the identifier will be + * displayed. Must be either UBIDI_LTR or UBIDI_RTL. + * @param id The UTF-8 format identifier whose skeleton will be computed. + * @param length The length of the input string, in bytes, + * or -1 if the string is zero terminated. + * @param dest The output buffer, to receive the skeleton string. + * @param destCapacity The length of the output buffer, in bytes. + * The destCapacity may be zero, in which case the function will + * return the actual length of the skeleton. + * @param status The error code, set if an error occurred while attempting to + * perform the check. Possible Errors include U_INVALID_CHAR_FOUND + * for invalid UTF-8 sequences, and + * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small + * to hold the complete skeleton. + * @return The length of the skeleton string, in bytes. The returned length + * is always that of the complete skeleton, even when the + * supplied buffer is too small (or of zero length) + * + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction, + const char *id, int32_t length, char *dest, + int32_t destCapacity, UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ + /** * Get the set of Candidate Characters for Inclusion in Identifiers, as defined * in http://unicode.org/Public/security/latest/xidmodifications.txt @@ -1510,11 +1736,42 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s2, UErrorCode *status); +#ifndef U_HIDE_DRAFT_API +/** + * A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings. + * + * @param sc The USpoofChecker + * @param direction The paragraph direction with which the identifiers are + * displayed. Must be either UBIDI_LTR or UBIDI_RTL. + * @param s1 The first of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param s2 The second of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Confusability of the identifiers is not reported here, + * but through this function's return value. + * @return An integer value with bit(s) set corresponding to + * the type of confusability found, as defined by + * enum USpoofChecks. Zero is returned if the identifiers + * are not confusable. + * + * @draft ICU 74 + * + * @see uspoof_areBidiConfusable + */ +U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc, + UBiDiDirection direction, + const icu::UnicodeString &s1, + const icu::UnicodeString &s2, + UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ + /** * Get the "skeleton" for an identifier. * Skeletons are a transformation of the input identifier; * Two identifiers are confusable if their skeletons are identical. - * See Unicode UAX #39 for additional information. + * See Unicode Technical Standard #39 for additional information. * * Using skeletons directly makes it possible to quickly check * whether an identifier is confusable with any of some large @@ -1540,6 +1797,36 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, icu::UnicodeString &dest, UErrorCode *status); +#ifndef U_HIDE_DRAFT_API +/** + * Get the "bidiSkeleton" for an identifier and a direction. + * Skeletons are a transformation of the input identifier; + * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical; + * they are RTL-confusable if their RTL bidiSkeletons are identical. + * See Unicode Technical Standard #39 for additional information. + * https://www.unicode.org/reports/tr39/#Confusable_Detection. + * + * Using skeletons directly makes it possible to quickly check + * whether an identifier is confusable with any of some large + * set of existing identifiers, by creating an efficiently + * searchable collection of the skeletons. + * + * @param sc The USpoofChecker. + * @param direction The context direction with which the identifier will be + * displayed. Must be either UBIDI_LTR or UBIDI_RTL. + * @param id The input identifier whose bidiSkeleton will be computed. + * @param dest The output identifier, to receive the skeleton string. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * @return A reference to the destination (skeleton) string. + * + * @draft ICU 74 + */ +U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString( + const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id, + icu::UnicodeString &dest, UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ + /** * Get the set of Candidate Characters for Inclusion in Identifiers, as defined * in http://unicode.org/Public/security/latest/xidmodifications.txt diff --git a/yass/third_party/icu/source/i18n/units_converter.cpp b/yass/third_party/icu/source/i18n/units_converter.cpp index b89f495121..6758c129e5 100644 --- a/yass/third_party/icu/source/i18n/units_converter.cpp +++ b/yass/third_party/icu/source/i18n/units_converter.cpp @@ -372,11 +372,11 @@ void U_I18N_API addSingleFactorConstant(StringPiece baseStr, int32_t power, Sign factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; } else if (baseStr == "in3_to_m3") { factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; - factor.factorDen *= 12 * 12 * 12; + factor.factorDen *= std::pow(12 * 12 * 12, power * signum); } else if (baseStr == "gal_to_m3") { - factor.factorNum *= 231; factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; - factor.factorDen *= 12 * 12 * 12; + factor.factorNum *= std::pow(231, power * signum); + factor.factorDen *= std::pow(12 * 12 * 12, power * signum); } else if (baseStr == "gal_imp_to_m3") { factor.constantExponents[CONSTANT_GAL_IMP2M3] += power * signum; } else if (baseStr == "G") { @@ -397,6 +397,14 @@ void U_I18N_API addSingleFactorConstant(StringPiece baseStr, int32_t power, Sign factor.constantExponents[CONSTANT_SEC_PER_JULIAN_YEAR] += power * signum; } else if (baseStr == "speed_of_light_meters_per_second") { factor.constantExponents[CONSTANT_SPEED_OF_LIGHT_METERS_PER_SECOND] += power * signum; + } else if (baseStr == "sho_to_m3") { + factor.constantExponents[CONSTANT_SHO_TO_M3] += power * signum; + } else if (baseStr == "tsubo_to_m2") { + factor.constantExponents[CONSTANT_TSUBO_TO_M2] += power * signum; + } else if (baseStr == "shaku_to_m") { + factor.constantExponents[CONSTANT_SHAKU_TO_M] += power * signum; + } else if (baseStr == "AMU") { + factor.constantExponents[CONSTANT_AMU] += power * signum; } else { if (signum == Signum::NEGATIVE) { factor.factorDen *= std::pow(strToDouble(baseStr, status), power); @@ -535,7 +543,7 @@ void UnitsConverter::init(const ConversionRates &ratesInfo, UErrorCode &status) loadConversionRate(conversionRate_, conversionRate_.source, conversionRate_.target, unitsState, ratesInfo, status); - + } int32_t UnitsConverter::compareTwoUnits(const MeasureUnitImpl &firstUnit, diff --git a/yass/third_party/icu/source/i18n/units_converter.h b/yass/third_party/icu/source/i18n/units_converter.h index fd1d6ec422..8dc40cd102 100644 --- a/yass/third_party/icu/source/i18n/units_converter.h +++ b/yass/third_party/icu/source/i18n/units_converter.h @@ -33,6 +33,10 @@ enum Constants { CONSTANT_METERS_PER_AU, CONSTANT_SEC_PER_JULIAN_YEAR, CONSTANT_SPEED_OF_LIGHT_METERS_PER_SECOND, + CONSTANT_SHO_TO_M3, // https://en.wikipedia.org/wiki/Japanese_units_of_measurement + CONSTANT_TSUBO_TO_M2, // https://en.wikipedia.org/wiki/Japanese_units_of_measurement + CONSTANT_SHAKU_TO_M, // https://en.wikipedia.org/wiki/Japanese_units_of_measurement + CONSTANT_AMU, // Atomic Mass Unit https://www.nist.gov/pml/special-publication-811/nist-guide-si-chapter-5-units-outside-si#table7 // Must be the last element. CONSTANTS_COUNT @@ -55,6 +59,10 @@ static const double constantsValues[CONSTANTS_COUNT] = { 149597870700, // CONSTANT_METERS_PER_AU 31557600, // CONSTANT_SEC_PER_JULIAN_YEAR 299792458, // CONSTANT_SPEED_OF_LIGHT_METERS_PER_SECOND + 2401.0 / (1331.0 * 1000.0), + 400.0 / 121.0, + 4.0 / 121.0, + 1.66053878283E-27, // CONSTANT_AMU }; typedef enum Signum { @@ -214,7 +222,7 @@ class U_I18N_API UnitsConverter : public UMemory { /** * Initialises the object. - */ + */ void init(const ConversionRates &ratesInfo, UErrorCode &status); }; diff --git a/yass/third_party/icu/source/i18n/units_data.cpp b/yass/third_party/icu/source/i18n/units_data.cpp index 5fa17567a1..b1e069660f 100644 --- a/yass/third_party/icu/source/i18n/units_data.cpp +++ b/yass/third_party/icu/source/i18n/units_data.cpp @@ -7,9 +7,11 @@ #include "bytesinkutil.h" #include "cstring.h" +#include "measunit_impl.h" #include "number_decimalquantity.h" #include "resource.h" #include "uassert.h" +#include "ulocimp.h" #include "unicode/locid.h" #include "unicode/unistr.h" #include "unicode/ures.h" @@ -78,6 +80,7 @@ class ConversionRateDataSink : public ResourceSink { UnicodeString baseUnit = ICU_Utility::makeBogusString(); UnicodeString factor = ICU_Utility::makeBogusString(); UnicodeString offset = ICU_Utility::makeBogusString(); + UnicodeString systems = ICU_Utility::makeBogusString(); for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) { if (uprv_strcmp(key, "target") == 0) { baseUnit = value.getUnicodeString(status); @@ -85,6 +88,8 @@ class ConversionRateDataSink : public ResourceSink { factor = value.getUnicodeString(status); } else if (uprv_strcmp(key, "offset") == 0) { offset = value.getUnicodeString(status); + } else if (uprv_strcmp(key, "systems") == 0) { + systems = value.getUnicodeString(status); } } if (U_FAILURE(status)) { return; } @@ -103,6 +108,7 @@ class ConversionRateDataSink : public ResourceSink { cr->sourceUnit.append(srcUnit, status); cr->baseUnit.appendInvariantChars(baseUnit, status); cr->factor.appendInvariantChars(factor, status); + cr->systems.appendInvariantChars(systems, status); trimSpaces(cr->factor, status); if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status); } @@ -409,14 +415,19 @@ MaybeStackVector MaybeStackVector result; // TODO: remove this once all the categories are allowed. + // WARNING: when this is removed please make sure to keep the "fahrenhe" => "fahrenheit" mapping UErrorCode internalMuStatus = U_ZERO_ERROR; if (category.compare("temperature") == 0) { CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus); if (U_SUCCESS(internalMuStatus)) { + // The value for -u-mu- is `fahrenhe`, but CLDR and everything else uses `fahrenheit` + if (localeUnitCharString == "fahrenhe") { + localeUnitCharString = CharString("fahrenheit", status); + } // TODO: use the unit category as Java especially when all the categories are allowed.. - if (localeUnitCharString == "celsius" // - || localeUnitCharString == "fahrenheit" // - || localeUnitCharString == "kelvin" // + if (localeUnitCharString == "celsius" + || localeUnitCharString == "fahrenheit" + || localeUnitCharString == "kelvin" ) { UnitPreference unitPref; unitPref.unit.append(localeUnitCharString, status); @@ -426,46 +437,16 @@ MaybeStackVector } } - CharString region(locale.getCountry(), status); - + char regionBuf[8]; + ulocimp_getRegionForSupplementalData(locale.getName(), false, regionBuf, 8, &status); + CharString region(regionBuf, status); + // Check the locale system tag, e.g `ms=metric`. UErrorCode internalMeasureTagStatus = U_ZERO_ERROR; CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus); bool isLocaleSystem = false; - if (U_SUCCESS(internalMeasureTagStatus)) { - if (localeSystem == "metric") { - region.clear(); - region.append("001", status); - isLocaleSystem = true; - } else if (localeSystem == "ussystem") { - region.clear(); - region.append("US", status); - isLocaleSystem = true; - } else if (localeSystem == "uksystem") { - region.clear(); - region.append("GB", status); - isLocaleSystem = true; - } - } - - // Check the region tag, e.g. `rg=uszzz`. - if (!isLocaleSystem) { - UErrorCode internalRgTagStatus = U_ZERO_ERROR; - CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus); - if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) { - if (localeRegion == "default") { - region.clear(); - region.append(localeRegion, status); - } else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') { - region.clear(); - region.append(localeRegion.data(), 3, status); - } else { - // Take the first two character and capitalize them. - region.clear(); - region.append(uprv_toupper(localeRegion[0]), status); - region.append(uprv_toupper(localeRegion[1]), status); - } - } + if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) { + isLocaleSystem = true; } int32_t idx = @@ -476,6 +457,48 @@ MaybeStackVector U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. const UnitPreferenceMetadata *m = metadata_[idx]; + + if (isLocaleSystem) { + // if the locale ID specifies a measurment system, check if ALL of the units we got back + // are members of that system (or are "metric_adjacent", which we consider to match all + // the systems) + bool unitsMatchSystem = true; + ConversionRates rates(status); + for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) { + const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]); + MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status); + for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) { + const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j]; + const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status); + CharString systems(rateInfo->systems, status); + if (!systems.contains("metric_adjacent")) { // "metric-adjacent" is considered to match all the locale systems + if (!systems.contains(localeSystem.data())) { + unitsMatchSystem = false; + } + } + } + } + + // if any of the units we got back above don't match the mearurement system the locale ID asked for, + // throw out the region and just load the units for the base region for the requested measurement system + if (!unitsMatchSystem) { + region.clear(); + if (localeSystem == "ussystem") { + region.append("US", status); + } else if (localeSystem == "uksystem") { + region.append("GB", status); + } else { + region.append("001", status); + } + idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status); + if (U_FAILURE(status)) { + return result; + } + + m = metadata_[idx]; + } + } + for (int32_t i = 0; i < m->prefsCount; i++) { result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset])); } diff --git a/yass/third_party/icu/source/i18n/units_data.h b/yass/third_party/icu/source/i18n/units_data.h index 118458ecca..ad5033513c 100644 --- a/yass/third_party/icu/source/i18n/units_data.h +++ b/yass/third_party/icu/source/i18n/units_data.h @@ -41,6 +41,7 @@ class U_I18N_API ConversionRateInfo : public UMemory { CharString baseUnit; CharString factor; CharString offset; + CharString systems; }; } // namespace units diff --git a/yass/third_party/icu/source/i18n/unum.cpp b/yass/third_party/icu/source/i18n/unum.cpp index 11a7bcba57..eb6c86252b 100644 --- a/yass/third_party/icu/source/i18n/unum.cpp +++ b/yass/third_party/icu/source/i18n/unum.cpp @@ -28,17 +28,19 @@ #include "unicode/dcfmtsym.h" #include "unicode/curramt.h" #include "unicode/localpointer.h" +#include "unicode/measfmt.h" #include "unicode/udisplaycontext.h" #include "uassert.h" #include "cpputils.h" #include "cstring.h" +#include "putilimp.h" U_NAMESPACE_USE U_CAPI UNumberFormat* U_EXPORT2 -unum_open( UNumberFormatStyle style, +unum_open( UNumberFormatStyle style, const char16_t* pattern, int32_t patternLength, const char* locale, @@ -671,6 +673,7 @@ unum_getTextAttribute(const UNumberFormat* fmt, const NumberFormat* nf = reinterpret_cast(fmt); const DecimalFormat* df = dynamic_cast(nf); + const RuleBasedNumberFormat* rbnf = nullptr; // cast is below for performance if (df != nullptr) { switch(tag) { case UNUM_POSITIVE_PREFIX: @@ -701,8 +704,7 @@ unum_getTextAttribute(const UNumberFormat* fmt, *status = U_UNSUPPORTED_ERROR; return -1; } - } else { - const RuleBasedNumberFormat* rbnf = dynamic_cast(nf); + } else if ((rbnf = dynamic_cast(nf)) != nullptr) { U_ASSERT(rbnf != nullptr); if (tag == UNUM_DEFAULT_RULESET) { res = rbnf->getDefaultRuleSetName(); @@ -716,6 +718,9 @@ unum_getTextAttribute(const UNumberFormat* fmt, *status = U_UNSUPPORTED_ERROR; return -1; } + } else { + *status = U_UNSUPPORTED_ERROR; + return -1; } return res.extract(result, resultLength, *status); @@ -794,15 +799,16 @@ unum_toPattern( const UNumberFormat* fmt, const NumberFormat* nf = reinterpret_cast(fmt); const DecimalFormat* df = dynamic_cast(nf); + const RuleBasedNumberFormat* rbnf = nullptr; // cast is below for performance if (df != nullptr) { if(isPatternLocalized) df->toLocalizedPattern(pat); else df->toPattern(pat); + } else if ((rbnf = dynamic_cast(nf)) != nullptr) { + pat = rbnf->getRules(); } else { - const RuleBasedNumberFormat* rbnf = dynamic_cast(nf); - U_ASSERT(rbnf != nullptr); - pat = rbnf->getRules(); + // leave `pat` empty } return pat.extract(result, resultLength, *status); } diff --git a/yass/third_party/icu/source/i18n/uspoof.cpp b/yass/third_party/icu/source/i18n/uspoof.cpp index 271caeafff..47dac8418e 100644 --- a/yass/third_party/icu/source/i18n/uspoof.cpp +++ b/yass/third_party/icu/source/i18n/uspoof.cpp @@ -15,6 +15,7 @@ * * Unicode Spoof Detection */ +#include "unicode/ubidi.h" #include "unicode/utypes.h" #include "unicode/normalizer2.h" #include "unicode/uspoof.h" @@ -141,8 +142,8 @@ void U_CALLCONV initializeStatics(UErrorCode &status) { u"\\U0001DF00-\\U0001DF1E\\U0001DF25-\\U0001DF2A\\U0001E08F\\U0001E7E0-" u"\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED\\U0001E7EE\\U0001E7F0-" u"\\U0001E7FE\\U00020000-\\U0002A6DF\\U0002A700-\\U0002B739\\U0002B740-" - u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U00030000-" - u"\\U0003134A\\U00031350-\\U000323AF]"; + u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U0002EBF0-" + u"\\U0002EE5D\\U00030000-\\U0003134A\\U00031350-\\U000323AF]"; gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status); if (gRecommendedSet == nullptr) { @@ -538,6 +539,90 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc, return result; } +U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction, + const char16_t *id1, int32_t length1, + const char16_t *id2, int32_t length2, + UErrorCode *status) { + UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor + UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor + if (id1Str.isBogus() || id2Str.isBogus()) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status); +} + +U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction, + const char *id1, int32_t length1, const char *id2, + int32_t length2, UErrorCode *status) { + if (length1 < -1 || length2 < -1) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString id1Str = UnicodeString::fromUTF8( + StringPiece(id1, length1 >= 0 ? length1 : static_cast(uprv_strlen(id1)))); + UnicodeString id2Str = UnicodeString::fromUTF8( + StringPiece(id2, length2 >= 0 ? length2 : static_cast(uprv_strlen(id2)))); + return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status); +} + +U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc, + UBiDiDirection direction, + const icu::UnicodeString &id1, + const icu::UnicodeString &id2, + UErrorCode *status) { + const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); + if (U_FAILURE(*status)) { + return 0; + } + // + // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable, + // and for definitions of the types (single, whole, mixed-script) of confusables. + + // We only care about a few of the check flags. Ignore the others. + // If no tests relevant to this function have been specified, return an error. + // TODO: is this really the right thing to do? It's probably an error on the caller's part, + // but logically we would just return 0 (no error). + if ((This->fChecks & USPOOF_CONFUSABLE) == 0) { + *status = U_INVALID_STATE_ERROR; + return 0; + } + + // Compute the skeletons and check for confusability. + UnicodeString id1Skeleton; + uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status); + UnicodeString id2Skeleton; + uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status); + if (U_FAILURE(*status)) { + return 0; + } + if (id1Skeleton != id2Skeleton) { + return 0; + } + + // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate + // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets + // of id1 and id2. + ScriptSet id1RSS; + This->getResolvedScriptSet(id1, id1RSS, *status); + ScriptSet id2RSS; + This->getResolvedScriptSet(id2, id2RSS, *status); + + // Turn on all applicable flags + uint32_t result = 0; + if (id1RSS.intersects(id2RSS)) { + result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; + } else { + result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; + if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) { + result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; + } + } + + // Turn off flags that the user doesn't want + return result & This->fChecks; +} + U_CAPI int32_t U_EXPORT2 uspoof_checkUnicodeString(const USpoofChecker *sc, @@ -697,6 +782,60 @@ uspoof_getSkeleton(const USpoofChecker *sc, return destStr.length(); } +U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction, + const UChar *id, int32_t length, UChar *dest, + int32_t destCapacity, UErrorCode *status) { + UnicodeString idStr((length == -1), id, length); // Aliasing constructor + if (idStr.isBogus()) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString destStr; + uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status); + return destStr.extract(dest, destCapacity, *status); +} + + + +U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc, + UBiDiDirection direction, + const UnicodeString &id, + UnicodeString &dest, + UErrorCode *status) { + dest.remove(); + if (direction != UBIDI_LTR && direction != UBIDI_RTL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return dest; + } + UBiDi *bidi = ubidi_open(); + ubidi_setPara(bidi, id.getBuffer(), id.length(), direction, + /*embeddingLevels*/ nullptr, status); + if (U_FAILURE(*status)) { + ubidi_close(bidi); + return dest; + } + UnicodeString reordered; + int32_t const size = ubidi_getProcessedLength(bidi); + UChar* const reorderedBuffer = reordered.getBuffer(size); + if (reorderedBuffer == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + ubidi_close(bidi); + return dest; + } + ubidi_writeReordered(bidi, reorderedBuffer, size, + UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status); + reordered.releaseBuffer(size); + ubidi_close(bidi); + + if (U_FAILURE(*status)) { + return dest; + } + + // The type parameter is deprecated since ICU 58; any number may be passed. + constexpr uint32_t deprecatedType = 58; + return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status); +} + U_I18N_API UnicodeString & U_EXPORT2 @@ -721,19 +860,17 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, for (inputIndex=0; inputIndex < normalizedLen; ) { UChar32 c = nfdId.char32At(inputIndex); inputIndex += U16_LENGTH(c); - This->fSpoofData->confusableLookup(c, skelStr); + if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) { + This->fSpoofData->confusableLookup(c, skelStr); + } } gNfdNormalizer->normalize(skelStr, dest, *status); return dest; } - -U_CAPI int32_t U_EXPORT2 -uspoof_getSkeletonUTF8(const USpoofChecker *sc, - uint32_t type, - const char *id, int32_t length, - char *dest, int32_t destCapacity, +U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, + int32_t length, char *dest, int32_t destCapacity, UErrorCode *status) { SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { @@ -744,7 +881,8 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc, return 0; } - UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast(uprv_strlen(id)))); + UnicodeString srcStr = UnicodeString::fromUTF8( + StringPiece(id, length >= 0 ? length : static_cast(uprv_strlen(id)))); UnicodeString destStr; uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); if (U_FAILURE(*status)) { @@ -752,8 +890,28 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc, } int32_t lengthInUTF8 = 0; - u_strToUTF8(dest, destCapacity, &lengthInUTF8, - destStr.getBuffer(), destStr.length(), status); + u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); + return lengthInUTF8; +} + +U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction, + const char *id, int32_t length, char *dest, + int32_t destCapacity, UErrorCode *status) { + if (length < -1) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + UnicodeString srcStr = UnicodeString::fromUTF8( + StringPiece(id, length >= 0 ? length : static_cast(uprv_strlen(id)))); + UnicodeString destStr; + uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status); + if (U_FAILURE(*status)) { + return 0; + } + + int32_t lengthInUTF8 = 0; + u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); return lengthInUTF8; } diff --git a/yass/third_party/icu/source/i18n/windtfmt.cpp b/yass/third_party/icu/source/i18n/windtfmt.cpp index 2118f92dce..6d705b299d 100644 --- a/yass/third_party/icu/source/i18n/windtfmt.cpp +++ b/yass/third_party/icu/source/i18n/windtfmt.cpp @@ -30,7 +30,10 @@ #include "unicode/timezone.h" #include "unicode/utmscale.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" +#include "ulocimp.h" #include "uresimp.h" #include "windtfmt.h" #include "wintzimpl.h" @@ -99,10 +102,13 @@ UnicodeString* Win32DateFormat::getTimeDateFormat(const Calendar *cal, const Loc static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeString** buffer) { UErrorCode status = U_ZERO_ERROR; - char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; // Convert from names like "en_CA" and "de_DE@collation=phonebook" to "en-CA" and "de-DE-u-co-phonebk". - (void)uloc_toLanguageTag(locale.getName(), asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), false, &status); + CharString asciiBCP47Tag; + { + CharStringByteSink sink(&asciiBCP47Tag); + ulocimp_toLanguageTag(locale.getName(), sink, false, &status); + } if (U_SUCCESS(status)) { diff --git a/yass/third_party/icu/source/i18n/winnmfmt.cpp b/yass/third_party/icu/source/i18n/winnmfmt.cpp index 6efa9a63d8..bf0bd20095 100644 --- a/yass/third_party/icu/source/i18n/winnmfmt.cpp +++ b/yass/third_party/icu/source/i18n/winnmfmt.cpp @@ -24,8 +24,11 @@ #include "unicode/locid.h" #include "unicode/ustring.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" #include "uassert.h" +#include "ulocimp.h" #include "locmap.h" #ifndef WIN32_LEAN_AND_MEAN @@ -144,10 +147,13 @@ static void freeCurrencyFormat(CURRENCYFMTW *fmt) static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeString** buffer) { UErrorCode status = U_ZERO_ERROR; - char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; // Convert from names like "en_CA" and "de_DE@collation=phonebook" to "en-CA" and "de-DE-u-co-phonebk". - (void) uloc_toLanguageTag(locale.getName(), asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), false, &status); + CharString asciiBCP47Tag; + { + CharStringByteSink sink(&asciiBCP47Tag); + ulocimp_toLanguageTag(locale.getName(), sink, false, &status); + } if (U_SUCCESS(status)) { diff --git a/yass/third_party/icu/source/i18n/zonemeta.cpp b/yass/third_party/icu/source/i18n/zonemeta.cpp index 42051e2f41..78d2391479 100644 --- a/yass/third_party/icu/source/i18n/zonemeta.cpp +++ b/yass/third_party/icu/source/i18n/zonemeta.cpp @@ -120,6 +120,7 @@ static const char gKeyTypeData[] = "keyTypeData"; static const char gTypeAliasTag[] = "typeAlias"; static const char gTypeMapTag[] = "typeMap"; static const char gTimezoneTag[] = "timezone"; +static const char gIanaMapTag[] = "ianaMap"; static const char gPrimaryZonesTag[] = "primaryZones"; @@ -389,6 +390,35 @@ ZoneMeta::getCanonicalCLDRID(const TimeZone& tz) { return getCanonicalCLDRID(tz.getID(tzID), status); } +UnicodeString& U_EXPORT2 +ZoneMeta::getIanaID(const UnicodeString& tzid, UnicodeString& ianaID, UErrorCode& status) { + // First, get CLDR canonical ID + const char16_t *canonicalID = getCanonicalCLDRID(tzid, status); + if (U_FAILURE(status) || canonicalID == nullptr) { + ianaID.setToBogus(); + return ianaID; + } + // Find IANA mapping if any. + UErrorCode tmpStatus = U_ZERO_ERROR; + UnicodeString tmpKey(canonicalID); + tmpKey.findAndReplace(UnicodeString("/"), UnicodeString(":")); + char keyBuf[ZID_KEY_MAX + 1]; + /* int32_t keyLen = */ tmpKey.extract(0, tmpKey.length(), keyBuf, sizeof(keyBuf), US_INV); + + StackUResourceBundle r; + ures_openDirectFillIn(r.getAlias(), nullptr, gKeyTypeData, &tmpStatus); + ures_getByKey(r.getAlias(), gIanaMapTag, r.getAlias(), &tmpStatus); + ures_getByKey(r.getAlias(), gTimezoneTag, r.getAlias(), &tmpStatus); + int32_t tmpLen = 0; + const char16_t* tmpIana = ures_getStringByKey(r.getAlias(), keyBuf, &tmpLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + ianaID.setTo(true, tmpIana, -1); + } else { + ianaID.setTo(true, canonicalID, -1); + } + return ianaID; +} + static void U_CALLCONV countryInfoVectorsInit(UErrorCode &status) { // Create empty vectors // No deleters for these UVectors, it's a reference to a resource bundle string. diff --git a/yass/third_party/icu/source/i18n/zonemeta.h b/yass/third_party/icu/source/i18n/zonemeta.h index 8c5840c265..dec0a65c3c 100644 --- a/yass/third_party/icu/source/i18n/zonemeta.h +++ b/yass/third_party/icu/source/i18n/zonemeta.h @@ -54,6 +54,17 @@ public: */ static const char16_t* U_EXPORT2 getCanonicalCLDRID(const TimeZone& tz); + /** + * Returns primary IANA zone ID for the input zone ID, which might be the id itself. + * If the given system tzid is not known, U_ILLEGAL_ARGUMENT_ERROR is set in the status. + * + * @param tzid Zone ID + * @param ianaID Output IANA ID + * @param status Receives the status + * @return A primary IANA zone ID equivalent to the input zone ID. + */ + static UnicodeString& U_EXPORT2 getIanaID(const UnicodeString& tzid, UnicodeString& ianaID, UErrorCode& status); + /** * Return the canonical country code for this tzid. If we have none, or if the time zone * is not associated with a country, return bogus string. diff --git a/yass/third_party/icu/source/stubdata/stubdata.cpp b/yass/third_party/icu/source/stubdata/stubdata.cpp index 0fa2b170eb..32ceb1850b 100644 --- a/yass/third_party/icu/source/stubdata/stubdata.cpp +++ b/yass/third_party/icu/source/stubdata/stubdata.cpp @@ -1,3 +1,4 @@ + // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /****************************************************************************** @@ -17,36 +18,40 @@ * * The stub data library (for which this file is the source) is sufficient * for running the data building tools. +* */ - -#include "stubdata.h" - -extern "C" U_EXPORT const ICU_Data_Header U_ICUDATA_ENTRY_POINT alignas(16) = { - 32, /* headerSize */ - 0xda, /* magic1, (see struct MappedData in udata.c) */ - 0x27, /* magic2 */ - { /*UDataInfo */ - sizeof(UDataInfo), /* size */ - 0, /* reserved */ - +#include "ucmndata.h" +extern "C" U_EXPORT const ICU_Data_Header U_ICUDATA_ENTRY_POINT = { + { /* DataHeader */ + { /* MappedData */ + 32, /* headerSize */ + 0xda, /* magic1, (see struct MappedData in udata.c) */ + 0x27, /* magic2 */ + }, + { /*UDataInfo */ + sizeof(UDataInfo), /* size */ + 0, /* reserved */ #if U_IS_BIG_ENDIAN - 1, + 1, #else - 0, + 0, #endif - - U_CHARSET_FAMILY, - sizeof(char16_t), - 0, /* reserved */ - {0x54, 0x6f, 0x43, 0x50}, /* data format identifier: "ToCP" */ - {1, 0, 0, 0}, /* format version major, minor, milli, micro */ - {0, 0, 0, 0} /* dataVersion */ + U_CHARSET_FAMILY, + sizeof(UChar), + 0, /* reserved */ + { /* data format identifier */ + 0x54, 0x6f, 0x43, 0x50}, /* "ToCP" */ + {1, 0, 0, 0}, /* format version major, minor, milli, micro */ + {0, 0, 0, 0} /* dataVersion */ + }, }, - { 's', 't', 'u', 'b', 'd', 'a', 't', 'a' }, /* Padding[8] */ + {0,0,0,0,0,0,0,0}, /* Padding[8] */ 0, /* count */ 0, /* Reserved */ { /* TOC structure */ - 0 , 0 /* name and data entries. Count says there are none, */ +/* { */ + 0 , 0 , 0, 0 /* name and data entries. Count says there are none, */ /* but put one in just in case. */ +/* } */ } -}; +}; \ No newline at end of file diff --git a/yass/third_party/libc++/trunk/CMakeLists.txt b/yass/third_party/libc++/trunk/CMakeLists.txt index d392e95077..e565c47c76 100644 --- a/yass/third_party/libc++/trunk/CMakeLists.txt +++ b/yass/third_party/libc++/trunk/CMakeLists.txt @@ -229,8 +229,6 @@ option(LIBCXX_USE_COMPILER_RT "Use compiler-rt instead of libgcc" OFF) # ABI Library options --------------------------------------------------------- if (MSVC) set(LIBCXX_DEFAULT_ABI_LIBRARY "vcruntime") -elseif (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") - set(LIBCXX_DEFAULT_ABI_LIBRARY "libcxxrt") else() set(LIBCXX_DEFAULT_ABI_LIBRARY "libcxxabi") endif() diff --git a/yass/third_party/libc++/trunk/benchmarks/CMakeLists.txt b/yass/third_party/libc++/trunk/benchmarks/CMakeLists.txt index b436e96f17..387e013afe 100644 --- a/yass/third_party/libc++/trunk/benchmarks/CMakeLists.txt +++ b/yass/third_party/libc++/trunk/benchmarks/CMakeLists.txt @@ -176,12 +176,14 @@ set(BENCHMARK_TESTS algorithms/count.bench.cpp algorithms/equal.bench.cpp algorithms/find.bench.cpp + algorithms/fill.bench.cpp algorithms/for_each.bench.cpp algorithms/lower_bound.bench.cpp algorithms/make_heap.bench.cpp algorithms/make_heap_then_sort_heap.bench.cpp algorithms/min.bench.cpp algorithms/min_max_element.bench.cpp + algorithms/mismatch.bench.cpp algorithms/pop_heap.bench.cpp algorithms/pstl.stable_sort.bench.cpp algorithms/push_heap.bench.cpp diff --git a/yass/third_party/libc++/trunk/benchmarks/algorithms/fill.bench.cpp b/yass/third_party/libc++/trunk/benchmarks/algorithms/fill.bench.cpp new file mode 100644 index 0000000000..40f37425c3 --- /dev/null +++ b/yass/third_party/libc++/trunk/benchmarks/algorithms/fill.bench.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +static void bm_fill_n(benchmark::State& state) { + std::vector vec1(state.range()); + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::fill_n(vec1.begin(), vec1.size(), false)); + } +} +BENCHMARK(bm_fill_n)->DenseRange(1, 8)->Range(16, 1 << 20); + +static void bm_ranges_fill_n(benchmark::State& state) { + std::vector vec1(state.range()); + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::ranges::fill_n(vec1.begin(), vec1.size(), false)); + } +} +BENCHMARK(bm_ranges_fill_n)->DenseRange(1, 8)->Range(16, 1 << 20); + +static void bm_fill(benchmark::State& state) { + std::vector vec1(state.range()); + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + std::fill(vec1.begin(), vec1.end(), false); + } +} +BENCHMARK(bm_fill)->DenseRange(1, 8)->Range(16, 1 << 20); + +static void bm_ranges_fill(benchmark::State& state) { + std::vector vec1(state.range()); + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::ranges::fill(vec1, false)); + } +} +BENCHMARK(bm_ranges_fill)->DenseRange(1, 8)->Range(16, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/yass/third_party/libc++/trunk/benchmarks/algorithms/mismatch.bench.cpp b/yass/third_party/libc++/trunk/benchmarks/algorithms/mismatch.bench.cpp new file mode 100644 index 0000000000..9274932a76 --- /dev/null +++ b/yass/third_party/libc++/trunk/benchmarks/algorithms/mismatch.bench.cpp @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +// TODO: Look into benchmarking aligned and unaligned memory explicitly +// (currently things happen to be aligned because they are malloced that way) +template +static void bm_mismatch(benchmark::State& state) { + std::vector vec1(state.range(), '1'); + std::vector vec2(state.range(), '1'); + std::mt19937_64 rng(std::random_device{}()); + + vec1.back() = '2'; + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::mismatch(vec1.begin(), vec1.end(), vec2.begin())); + } +} +BENCHMARK(bm_mismatch)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_mismatch)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_mismatch)->DenseRange(1, 8)->Range(16, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/yass/third_party/libc++/trunk/docs/Contributing.rst b/yass/third_party/libc++/trunk/docs/Contributing.rst index 596d86ef22..90aabc9c4f 100644 --- a/yass/third_party/libc++/trunk/docs/Contributing.rst +++ b/yass/third_party/libc++/trunk/docs/Contributing.rst @@ -156,7 +156,7 @@ sure you don't forget anything: - Did you add all new named declarations to the ``std`` module? - If you added a header: - - Did you add it to ``include/module.modulemap.in``? + - Did you add it to ``include/module.modulemap``? - Did you add it to ``include/CMakeLists.txt``? - If it's a public header, did you update ``utils/libcxx/header_information.py``? diff --git a/yass/third_party/libc++/trunk/docs/FeatureTestMacroTable.rst b/yass/third_party/libc++/trunk/docs/FeatureTestMacroTable.rst index 60e0aea976..b213f430aa 100644 --- a/yass/third_party/libc++/trunk/docs/FeatureTestMacroTable.rst +++ b/yass/third_party/libc++/trunk/docs/FeatureTestMacroTable.rst @@ -442,7 +442,7 @@ Status --------------------------------------------------- ----------------- ``__cpp_lib_span_initializer_list`` ``202311L`` --------------------------------------------------- ----------------- - ``__cpp_lib_sstream_from_string_view`` *unimplemented* + ``__cpp_lib_sstream_from_string_view`` ``202306L`` --------------------------------------------------- ----------------- ``__cpp_lib_submdspan`` *unimplemented* --------------------------------------------------- ----------------- diff --git a/yass/third_party/libc++/trunk/docs/Modules.rst b/yass/third_party/libc++/trunk/docs/Modules.rst index ee2b81d3b9..5b027ed1bd 100644 --- a/yass/third_party/libc++/trunk/docs/Modules.rst +++ b/yass/third_party/libc++/trunk/docs/Modules.rst @@ -178,34 +178,13 @@ This is a small sample program that uses the module ``std``. It consists of a ) FetchContent_MakeAvailable(std) - # - # Adjust project compiler flags - # - - add_compile_options($<$:-fprebuilt-module-path=${std_BINARY_DIR}/CMakeFiles/std.dir/>) - add_compile_options($<$:-fprebuilt-module-path=${std_BINARY_DIR}/CMakeFiles/std.compat.dir/>) - add_compile_options($<$:-nostdinc++>) - # The include path needs to be set to be able to use macros from headers. - # For example from, the headers and . - add_compile_options($<$:-isystem>) - add_compile_options($<$:${LIBCXX_BUILD}/include/c++/v1>) - - # - # Adjust project linker flags - # - - add_link_options($<$:-nostdlib++>) - add_link_options($<$:-L${LIBCXX_BUILD}/lib>) - add_link_options($<$:-Wl,-rpath,${LIBCXX_BUILD}/lib>) - # Linking against the standard c++ library is required for CMake to get the proper dependencies. - link_libraries(std c++) - link_libraries(std.compat c++) - # # Add the project # add_executable(main) + add_dependencies(main std.compat) + target_link_libraries(main std.compat) target_sources(main PRIVATE main.cpp @@ -218,13 +197,9 @@ Building this project is done with the following steps, assuming the files $ mkdir build $ cmake -G Ninja -S . -B build -DCMAKE_CXX_COMPILER= -DLIBCXX_BUILD= - $ ninja -j1 std -C build $ ninja -C build $ build/main -.. note:: The ``std`` dependencies of ``std.compat`` is not always resolved when - building the ``std`` target using multiple jobs. - .. warning:: ```` should point point to the real binary and not to a symlink. diff --git a/yass/third_party/libc++/trunk/docs/ReleaseNotes/19.rst b/yass/third_party/libc++/trunk/docs/ReleaseNotes/19.rst index 0d381df5f0..dd39c1bbbc 100644 --- a/yass/third_party/libc++/trunk/docs/ReleaseNotes/19.rst +++ b/yass/third_party/libc++/trunk/docs/ReleaseNotes/19.rst @@ -41,6 +41,7 @@ Implemented Papers - P2637R3 - Member ``visit`` - P2652R2 - Disallow User Specialization of ``allocator_traits`` - P2819R2 - Add ``tuple`` protocol to ``complex`` +- P2495R3 - Interfacing ``stringstream``\s with ``string_view`` - P2302R4 - ``std::ranges::contains`` - P1659R3 - ``std::ranges::starts_with`` and ``std::ranges::ends_with`` @@ -48,6 +49,10 @@ Improvements and New Features ----------------------------- - The performance of growing ``std::vector`` has been improved for trivially relocatable types. +- The performance of ``ranges::fill`` and ``ranges::fill_n`` has been improved for ``vector::iterator``\s, + resulting in a performance increase of up to 1400x. +- The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance + improvements. Deprecations and Removals ------------------------- @@ -64,10 +69,11 @@ Deprecations and Removals provided, and such a base template is bound to be incorrect for some types, which could currently cause unexpected behavior while going undetected. -- TODO: The ``_LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT`` macro that changed the behavior for narrowing conversions +- The ``_LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT`` macro that changed the behavior for narrowing conversions in ``std::variant`` has been removed in LLVM 19. -- TODO: The ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS`` macro has been removed in LLVM 19. +- The ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION`` + macros have been removed in LLVM 19. - TODO: The ``_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES`` macros have been removed in LLVM 19. C++17 and C++20 removed features can still be re-enabled individually. diff --git a/yass/third_party/libc++/trunk/docs/Status/Cxx23Issues.csv b/yass/third_party/libc++/trunk/docs/Status/Cxx23Issues.csv index 70480b3382..ebdc4a745c 100644 --- a/yass/third_party/libc++/trunk/docs/Status/Cxx23Issues.csv +++ b/yass/third_party/libc++/trunk/docs/Status/Cxx23Issues.csv @@ -46,7 +46,7 @@ "`3473 `__","Normative encouragement in non-normative note","November 2020","|Complete|","15.0","|format|" "`3474 `__","Nesting ``join_views`` is broken because of CTAD","November 2020","|Complete|","15.0","|ranges|" "`3476 `__","``thread`` and ``jthread`` constructors require that the parameters be move-constructible but never move construct the parameters","November 2020","","" -"`3477 `__","Simplify constraints for ``semiregular-box``","November 2020","","","|ranges|" +"`3477 `__","Simplify constraints for ``semiregular-box``","November 2020","|Complete|","13.0","|ranges|" "`3482 `__","``drop_view``'s const begin should additionally require ``sized_range``","November 2020","|Complete|","14.0","|ranges|" "`3483 `__","``transform_view::iterator``'s difference is overconstrained","November 2020","|Complete|","14.0","|ranges|" "","","","","","" @@ -65,7 +65,7 @@ `2997 `__,"LWG 491 and the specification of ``{forward_,}list::unique``","June 2021","","" `3410 `__,"``lexicographical_compare_three_way`` is overspecified","June 2021","|Complete|","17.0","|spaceship|" `3430 `__,"``std::fstream`` & co. should be constructible from string_view","June 2021","","" -`3462 `__,"§[formatter.requirements]: Formatter requirements forbid use of ``fc.arg()``","June 2021","","","|format|" +`3462 `__,"§[formatter.requirements]: Formatter requirements forbid use of ``fc.arg()``","June 2021","|Nothing To Do|","","|format|" `3481 `__,"``viewable_range`` mishandles lvalue move-only views","June 2021","Superseded by `P2415R2 `__","","|ranges|" `3506 `__,"Missing allocator-extended constructors for ``priority_queue``","June 2021","|Complete|","14.0" `3517 `__,"``join_view::iterator``'s ``iter_swap`` is underconstrained","June 2021","|Complete|","14.0","|ranges|" @@ -77,7 +77,7 @@ `3523 `__,"``iota_view::sentinel`` is not always ``iota_view``'s sentinel","June 2021","","","|ranges|" `3526 `__,"Return types of ``uses_allocator_construction_args`` unspecified","June 2021","","" `3527 `__,"``uses_allocator_construction_args`` handles rvalue pairs of rvalue references incorrectly","June 2021","","" -`3528 `__,"``make_from_tuple`` can perform (the equivalent of) a C-style cast","June 2021","","" +`3528 `__,"``make_from_tuple`` can perform (the equivalent of) a C-style cast","June 2021","|Complete|","19.0" `3529 `__,"``priority_queue(first, last)`` should construct ``c`` with ``(first, last)``","June 2021","|Complete|","14.0" `3530 `__,"``BUILTIN-PTR-MEOW`` should not opt the type out of syntactic checks","June 2021","","" `3532 `__,"``split_view::inner-iterator::operator++(int)`` should depend on ``Base``","June 2021","","","|ranges|" @@ -169,7 +169,7 @@ "`3683 `__","``operator==`` for ``polymorphic_allocator`` cannot deduce template argument in common cases","July 2022","","" "`3687 `__","``expected`` move constructor should move","July 2022","|Complete|","16.0" "`3692 `__","``zip_view::iterator``'s ``operator<=>`` is overconstrained","July 2022","","","|ranges| |spaceship|" -"`3701 `__","Make ``formatter, charT>`` requirement explicit","July 2022","","","|format|" +"`3701 `__","Make ``formatter, charT>`` requirement explicit","July 2022","|Complete|","15.0","|format|" "`3702 `__","Should ``zip_transform_view::iterator`` remove ``operator<``","July 2022","","","|ranges| |spaceship|" "`3703 `__","Missing requirements for ``expected`` requires ``is_void``","July 2022","|Complete|","16.0" "`3704 `__","LWG 2059 added overloads that might be ill-formed for sets","July 2022","","" @@ -181,7 +181,7 @@ "`3711 `__","Missing preconditions for slide_view constructor","July 2022","","","|ranges|" "`3712 `__","``chunk_view`` and ``slide_view`` should not be ``default_initializable``","July 2022","","","|ranges|" "`3713 `__","Sorted with respect to comparator (only)","July 2022","","" -"`3715 `__","``view_interface::empty`` is overconstrained","July 2022","","","|ranges|" +"`3715 `__","``view_interface::empty`` is overconstrained","July 2022","|Complete|","19.0","|ranges|" "`3719 `__","Directory iterators should be usable with default sentinel","July 2022","|Complete|","17.0","|ranges|" "`3721 `__","Allow an ``arg-id`` with a value of zero for ``width`` in ``std-format-spec``","July 2022","|Complete|","16.0","|format|" "`3724 `__","``decay-copy`` should be constrained","July 2022","|Complete|","14.0" @@ -283,7 +283,7 @@ "`3655 `__","The ``INVOKE`` operation and union types","February 2023","|Complete|","18.0","" "`3723 `__","``priority_queue::push_range`` needs to ``append_range``","February 2023","","","|ranges|" "`3734 `__","Inconsistency in ``inout_ptr`` and ``out_ptr`` for empty case","February 2023","","","" -"`3772 `__","``repeat_view``'s ``piecewise`` constructor is missing Postconditions","February 2023","","","|ranges|" +"`3772 `__","``repeat_view``'s ``piecewise`` constructor is missing Postconditions","February 2023","|Complete|","17.0","|ranges|" "`3786 `__","Flat maps' deduction guide needs to default ``Allocator`` to be useful","February 2023","","","" "`3803 `__","``flat_foo`` constructors taking ``KeyContainer`` lack ``KeyCompare`` parameter","February 2023","","","" "`3810 `__","CTAD for ``std::basic_format_args``","February 2023","|Complete|","17.0","|format|" @@ -295,7 +295,7 @@ "`3847 `__","``ranges::to`` can still return views","February 2023","|Complete|","17.0","|ranges|" "`3862 `__","``basic_const_iterator``'s ``common_type`` specialization is underconstrained","February 2023","","","" "`3865 `__","Sorting a range of ``pairs``","February 2023","|Complete|","17.0","|ranges|" -"`3869 `__","Deprecate ``std::errc`` constants related to UNIX STREAMS","February 2023","","","" +"`3869 `__","Deprecate ``std::errc`` constants related to UNIX STREAMS","February 2023","|Complete|","19.0","" "`3870 `__","Remove ``voidify``","February 2023","","","" "`3871 `__","Adjust note about ``terminate``","February 2023","","","" "`3872 `__","``basic_const_iterator`` should have custom ``iter_move``","February 2023","","","" diff --git a/yass/third_party/libc++/trunk/docs/Status/Cxx23Papers.csv b/yass/third_party/libc++/trunk/docs/Status/Cxx23Papers.csv index 56e1468b4c..80547c5c1f 100644 --- a/yass/third_party/libc++/trunk/docs/Status/Cxx23Papers.csv +++ b/yass/third_party/libc++/trunk/docs/Status/Cxx23Papers.csv @@ -100,7 +100,7 @@ "`P2396R1 `__","LWG", "Concurrency TS 2 fixes ", "November 2022","","","|concurrency TS|" "`P2505R5 `__","LWG", "Monadic Functions for ``std::expected``", "November 2022","|Complete|","17.0","" "`P2539R4 `__","LWG", "Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?", "November 2022","|Complete|","18.0","|format|" -"`P2602R2 `__","LWG", "Poison Pills are Too Toxic", "November 2022","","","|ranges|" +"`P2602R2 `__","LWG", "Poison Pills are Too Toxic", "November 2022","|Complete|","19.0","|ranges|" "`P2708R1 `__","LWG", "No Further Fundamentals TSes", "November 2022","|Nothing to do|","","" "","","","","","","" "`P0290R4 `__","LWG", "``apply()`` for ``synchronized_value``","February 2023","","","|concurrency TS|" diff --git a/yass/third_party/libc++/trunk/docs/Status/Cxx2cPapers.csv b/yass/third_party/libc++/trunk/docs/Status/Cxx2cPapers.csv index 1c895f79a4..4a5443dea1 100644 --- a/yass/third_party/libc++/trunk/docs/Status/Cxx2cPapers.csv +++ b/yass/third_party/libc++/trunk/docs/Status/Cxx2cPapers.csv @@ -6,7 +6,7 @@ "`P2545R4 `__","LWG","Read-Copy Update (RCU)","Varna June 2023","","","" "`P2530R3 `__","LWG","Hazard Pointers for C++26","Varna June 2023","","","" "`P2538R1 `__","LWG","ADL-proof ``std::projected``","Varna June 2023","|Complete|","18.0","|ranges|" -"`P2495R3 `__","LWG","Interfacing ``stringstreams`` with ``string_view``","Varna June 2023","","","" +"`P2495R3 `__","LWG","Interfacing ``stringstream``\s with ``string_view``","Varna June 2023","|Complete|","19.0","" "`P2510R3 `__","LWG","Formatting pointers","Varna June 2023","|Complete| [#note-P2510R3]_","17.0","|format|" "`P2198R7 `__","LWG","Freestanding Feature-Test Macros and Implementation-Defined Extensions","Varna June 2023","","","" "`P2338R4 `__","LWG","Freestanding Library: Character primitives and the C library","Varna June 2023","","","" diff --git a/yass/third_party/libc++/trunk/docs/Status/SpaceshipPapers.csv b/yass/third_party/libc++/trunk/docs/Status/SpaceshipPapers.csv index 48fe417bc7..21b788ee42 100644 --- a/yass/third_party/libc++/trunk/docs/Status/SpaceshipPapers.csv +++ b/yass/third_party/libc++/trunk/docs/Status/SpaceshipPapers.csv @@ -3,10 +3,10 @@ `P2404R3 `_,"Relaxing ``equality_comparable_with``'s, ``totally_ordered_with``'s, and ``three_way_comparable_with``'s common reference requirements to support move-only types",, `LWG3330 `_,Include ```` from most library headers,"|Complete|","13.0" `LWG3347 `_,"``std::pair`` now requires ``T`` and ``U`` to be *less-than-comparable*",|Nothing To Do|, -`LWG3350 `_,Simplify return type of lexicographical_compare_three_way,|Nothing To Do|, +`LWG3350 `_,Simplify return type of ``lexicographical_compare_three_way``,|Nothing To Do|, `LWG3360 `_,``three_way_comparable_with`` is inconsistent with similar concepts,|Nothing To Do|, -`LWG3380 `_,common_type and comparison categories,|Nothing To Do|, -`LWG3395 `_,Definition for three-way comparison needs to be updated,|Nothing To Do|, +`LWG3380 `_,``common_type`` and comparison categories,|Nothing To Do|, +`LWG3395 `_,Definition for *three-way* comparison needs to be updated,|Nothing To Do|, `P0905R1 `_,Symmetry for spaceship,, `P1120R0 `_,Consistency improvements for ``<=>`` and other comparison operators,, `LWG3431 `_,``<=>`` for containers should require ``three_way_comparable`` instead of ``<=>``,, diff --git a/yass/third_party/libc++/trunk/docs/UsingLibcxx.rst b/yass/third_party/libc++/trunk/docs/UsingLibcxx.rst index 3b1be286c1..ac12b0b969 100644 --- a/yass/third_party/libc++/trunk/docs/UsingLibcxx.rst +++ b/yass/third_party/libc++/trunk/docs/UsingLibcxx.rst @@ -244,18 +244,6 @@ C++20 Specific Configuration Macros This macro is deprecated and will be removed in LLVM-19. Use the individual macros listed below. -**_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS**: - This macro is used to re-enable redundant members of `allocator`, - including `pointer`, `reference`, `rebind`, `address`, `max_size`, - `construct`, `destroy`, and the two-argument overload of `allocate`. - This macro has been deprecated and will be removed in LLVM-19. - -**_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION**: - This macro is used to re-enable the library-provided specializations of - `allocator` and `allocator`. - Use it in conjunction with `_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS` - to ensure that removed members of `allocator` can be accessed. - **_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS**: This macro is used to re-enable the `argument_type`, `result_type`, `first_argument_type`, and `second_argument_type` members of class diff --git a/yass/third_party/libc++/trunk/include/CMakeLists.txt b/yass/third_party/libc++/trunk/include/CMakeLists.txt index 459b077087..07b5e974ea 100644 --- a/yass/third_party/libc++/trunk/include/CMakeLists.txt +++ b/yass/third_party/libc++/trunk/include/CMakeLists.txt @@ -217,6 +217,7 @@ set(files __algorithm/shift_right.h __algorithm/shuffle.h __algorithm/sift_down.h + __algorithm/simd_utils.h __algorithm/sort.h __algorithm/sort_heap.h __algorithm/stable_partition.h @@ -380,7 +381,6 @@ set(files __format/format_context.h __format/format_error.h __format/format_functions.h - __format/format_fwd.h __format/format_parse_context.h __format/format_string.h __format/format_to_n_result.h @@ -430,21 +430,27 @@ set(files __fwd/array.h __fwd/bit_reference.h __fwd/complex.h + __fwd/deque.h + __fwd/format.h __fwd/fstream.h __fwd/functional.h __fwd/ios.h __fwd/istream.h __fwd/mdspan.h + __fwd/memory.h __fwd/memory_resource.h __fwd/ostream.h __fwd/pair.h + __fwd/queue.h __fwd/span.h __fwd/sstream.h + __fwd/stack.h __fwd/streambuf.h __fwd/string.h __fwd/string_view.h __fwd/subrange.h __fwd/tuple.h + __fwd/vector.h __hash_table __ios/fpos.h __iterator/access.h @@ -701,6 +707,7 @@ set(files __thread/thread.h __thread/timed_backoff_policy.h __tree + __tuple/find_index.h __tuple/make_tuple_types.h __tuple/pair_like.h __tuple/sfinae_helpers.h @@ -754,10 +761,7 @@ set(files __type_traits/is_constant_evaluated.h __type_traits/is_constructible.h __type_traits/is_convertible.h - __type_traits/is_copy_assignable.h - __type_traits/is_copy_constructible.h __type_traits/is_core_convertible.h - __type_traits/is_default_constructible.h __type_traits/is_destructible.h __type_traits/is_empty.h __type_traits/is_enum.h @@ -773,17 +777,10 @@ set(files __type_traits/is_member_function_pointer.h __type_traits/is_member_object_pointer.h __type_traits/is_member_pointer.h - __type_traits/is_move_assignable.h - __type_traits/is_move_constructible.h __type_traits/is_nothrow_assignable.h __type_traits/is_nothrow_constructible.h __type_traits/is_nothrow_convertible.h - __type_traits/is_nothrow_copy_assignable.h - __type_traits/is_nothrow_copy_constructible.h - __type_traits/is_nothrow_default_constructible.h __type_traits/is_nothrow_destructible.h - __type_traits/is_nothrow_move_assignable.h - __type_traits/is_nothrow_move_constructible.h __type_traits/is_null_pointer.h __type_traits/is_object.h __type_traits/is_pod.h @@ -804,14 +801,9 @@ set(files __type_traits/is_trivial.h __type_traits/is_trivially_assignable.h __type_traits/is_trivially_constructible.h - __type_traits/is_trivially_copy_assignable.h - __type_traits/is_trivially_copy_constructible.h __type_traits/is_trivially_copyable.h - __type_traits/is_trivially_default_constructible.h __type_traits/is_trivially_destructible.h __type_traits/is_trivially_lexicographically_comparable.h - __type_traits/is_trivially_move_assignable.h - __type_traits/is_trivially_move_constructible.h __type_traits/is_trivially_relocatable.h __type_traits/is_unbounded_array.h __type_traits/is_union.h @@ -968,6 +960,7 @@ set(files mdspan memory memory_resource + module.modulemap mutex new numbers @@ -1021,18 +1014,10 @@ set(files wctype.h ) -foreach(feature LIBCXX_ENABLE_FILESYSTEM LIBCXX_ENABLE_LOCALIZATION LIBCXX_ENABLE_THREADS LIBCXX_ENABLE_WIDE_CHARACTERS) - if (NOT ${${feature}}) - set(requires_${feature} "requires LIBCXX_CONFIGURED_WITHOUT_SUPPORT_FOR_THIS_HEADER") - endif() -endforeach() - configure_file("__config_site.in" "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" @ONLY) -configure_file("module.modulemap.in" "${LIBCXX_GENERATED_INCLUDE_DIR}/module.modulemap" @ONLY) configure_file("${LIBCXX_ASSERTION_HANDLER_FILE}" "${LIBCXX_GENERATED_INCLUDE_DIR}/__assertion_handler" COPYONLY) set(_all_includes "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}/__config_site" - "${LIBCXX_GENERATED_INCLUDE_DIR}/module.modulemap" "${LIBCXX_GENERATED_INCLUDE_DIR}/__assertion_handler") foreach(f ${files}) set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") diff --git a/yass/third_party/libc++/trunk/include/__algorithm/copy.h b/yass/third_party/libc++/trunk/include/__algorithm/copy.h index 4c3815405a..0890b895f5 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/copy.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/copy.h @@ -32,7 +32,7 @@ template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter); template -struct __copy_loop { +struct __copy_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -94,9 +94,7 @@ struct __copy_loop { __local_first = _Traits::__begin(++__segment_iterator); } } -}; -struct __copy_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -108,7 +106,7 @@ struct __copy_trivial { template pair<_InIter, _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __copy(_InIter __first, _Sent __last, _OutIter __result) { - return std::__dispatch_copy_or_move<_AlgPolicy, __copy_loop<_AlgPolicy>, __copy_trivial>( + return std::__copy_move_unwrap_iters<__copy_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/yass/third_party/libc++/trunk/include/__algorithm/copy_backward.h b/yass/third_party/libc++/trunk/include/__algorithm/copy_backward.h index 3ec88d8bd5..73dc846a97 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/copy_backward.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/copy_backward.h @@ -15,7 +15,7 @@ #include <__config> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter> __copy_backward(_InIter __first, _Sent __last, _OutIter __result); template -struct __copy_backward_loop { +struct __copy_backward_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -104,9 +104,7 @@ struct __copy_backward_loop { __local_last = _Traits::__end(__segment_iterator); } } -}; -struct __copy_backward_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -118,7 +116,7 @@ struct __copy_backward_trivial { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1, _BidirectionalIterator2> __copy_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result) { - return std::__dispatch_copy_or_move<_AlgPolicy, __copy_backward_loop<_AlgPolicy>, __copy_backward_trivial>( + return std::__copy_move_unwrap_iters<__copy_backward_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/yass/third_party/libc++/trunk/include/__algorithm/copy_move_common.h b/yass/third_party/libc++/trunk/include/__algorithm/copy_move_common.h index 0fc7a5e3ce..12a26c6d6a 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/copy_move_common.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/copy_move_common.h @@ -19,7 +19,7 @@ #include <__type_traits/enable_if.h> #include <__type_traits/is_always_bitcastable.h> #include <__type_traits/is_constant_evaluated.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__type_traits/is_trivially_assignable.h> #include <__type_traits/is_trivially_copyable.h> #include <__type_traits/is_volatile.h> @@ -81,30 +81,17 @@ __copy_backward_trivial_impl(_In* __first, _In* __last, _Out* __result) { // Iterator unwrapping and dispatching to the correct overload. -template -struct __overload : _F1, _F2 { - using _F1::operator(); - using _F2::operator(); -}; - -template -struct __can_rewrap : false_type {}; - -template -struct __can_rewrap<_InIter, - _Sent, - _OutIter, - // Note that sentinels are always copy-constructible. - __enable_if_t< is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value > > - : true_type {}; +template +struct __can_rewrap + : integral_constant::value && is_copy_constructible<_OutIter>::value> {}; template ::value, int> = 0> + __enable_if_t<__can_rewrap<_InIter, _OutIter>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter> -__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) { +__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) { auto __range = std::__unwrap_range(__first, std::move(__last)); auto __result = _Algorithm()(std::move(__range.first), std::move(__range.second), std::__unwrap_iter(__out_first)); return std::make_pair(std::__rewrap_range<_Sent>(std::move(__first), std::move(__result.first)), @@ -115,24 +102,12 @@ template ::value, int> = 0> + __enable_if_t::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter> -__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) { +__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) { return _Algorithm()(std::move(__first), std::move(__last), std::move(__out_first)); } -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter> -__dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) { - using _Algorithm = __overload<_NaiveAlgorithm, _OptimizedAlgorithm>; - return std::__unwrap_and_dispatch<_Algorithm>(std::move(__first), std::move(__last), std::move(__out_first)); -} - _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/yass/third_party/libc++/trunk/include/__algorithm/equal.h b/yass/third_party/libc++/trunk/include/__algorithm/equal.h index 3c0e3060e3..c76a16b47f 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/equal.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/equal.h @@ -69,20 +69,6 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first } #if _LIBCPP_STD_VER >= 14 -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -__equal(_InputIterator1 __first1, - _InputIterator1 __last1, - _InputIterator2 __first2, - _InputIterator2 __last2, - _BinaryPredicate __pred, - input_iterator_tag, - input_iterator_tag) { - for (; __first1 != __last1 && __first2 != __last2; ++__first1, (void)++__first2) - if (!__pred(*__first1, *__first2)) - return false; - return __first1 == __last1 && __first2 == __last2; -} template _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( @@ -110,17 +96,18 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -__equal(_RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, - _BinaryPredicate __pred, - random_access_iterator_tag, - random_access_iterator_tag) { - if (std::distance(__first1, __last1) != std::distance(__first2, __last2)) - return false; +template +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +equal(_InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _BinaryPredicate __pred) { + if constexpr (__has_random_access_iterator_category<_InputIterator1>::value && + __has_random_access_iterator_category<_InputIterator2>::value) { + if (std::distance(__first1, __last1) != std::distance(__first2, __last2)) + return false; + } __identity __proj; return std::__equal_impl( std::__unwrap_iter(__first1), @@ -132,36 +119,13 @@ __equal(_RandomAccessIterator1 __first1, __proj); } -template -_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -equal(_InputIterator1 __first1, - _InputIterator1 __last1, - _InputIterator2 __first2, - _InputIterator2 __last2, - _BinaryPredicate __pred) { - return std::__equal<_BinaryPredicate&>( - __first1, - __last1, - __first2, - __last2, - __pred, - typename iterator_traits<_InputIterator1>::iterator_category(), - typename iterator_traits<_InputIterator2>::iterator_category()); -} - template _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { - return std::__equal( - __first1, - __last1, - __first2, - __last2, - __equal_to(), - typename iterator_traits<_InputIterator1>::iterator_category(), - typename iterator_traits<_InputIterator2>::iterator_category()); + return std::equal(__first1, __last1, __first2, __last2, __equal_to()); } -#endif + +#endif // _LIBCPP_STD_VER >= 14 _LIBCPP_END_NAMESPACE_STD diff --git a/yass/third_party/libc++/trunk/include/__algorithm/equal_range.h b/yass/third_party/libc++/trunk/include/__algorithm/equal_range.h index a942904319..2b086abf17 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/equal_range.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/equal_range.h @@ -23,7 +23,7 @@ #include <__iterator/iterator_traits.h> #include <__iterator/next.h> #include <__type_traits/is_callable.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> diff --git a/yass/third_party/libc++/trunk/include/__algorithm/fill_n.h b/yass/third_party/libc++/trunk/include/__algorithm/fill_n.h index 36f3349d9e..f29633f880 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/fill_n.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/fill_n.h @@ -9,18 +9,74 @@ #ifndef _LIBCPP___ALGORITHM_FILL_N_H #define _LIBCPP___ALGORITHM_FILL_N_H +#include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> +#include <__memory/pointer_traits.h> #include <__utility/convert_to_integral.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset. +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value); + +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void +__fill_n_bool(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { + using _It = __bit_iterator<_Cp, false>; + using __storage_type = typename _It::__storage_type; + + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = std::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + if (_FillVal) + *__first.__seg_ |= __m; + else + *__first.__seg_ &= ~__m; + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + __storage_type __nw = __n / __bits_per_word; + std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0); + __n -= __nw * __bits_per_word; + // do last partial word + if (__n > 0) { + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if (_FillVal) + *__first.__seg_ |= __m; + else + *__first.__seg_ &= ~__m; + } +} + +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> +__fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) { + if (__n > 0) { + if (__value) + std::__fill_n_bool(__first, __n); + else + std::__fill_n_bool(__first, __n); + } + return __first + __n; +} + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) { @@ -37,4 +93,6 @@ fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_FILL_N_H diff --git a/yass/third_party/libc++/trunk/include/__algorithm/inplace_merge.h b/yass/third_party/libc++/trunk/include/__algorithm/inplace_merge.h index eb3c0bdbc2..a6bcc66a2f 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/inplace_merge.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/inplace_merge.h @@ -114,8 +114,8 @@ _LIBCPP_HIDE_FROM_ABI void __buffered_inplace_merge( for (_BidirectionalIterator __i = __middle; __i != __last; __d.template __incr(), (void)++__i, (void)++__p) ::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i)); - typedef __unconstrained_reverse_iterator<_BidirectionalIterator> _RBi; - typedef __unconstrained_reverse_iterator _Rv; + typedef reverse_iterator<_BidirectionalIterator> _RBi; + typedef reverse_iterator _Rv; typedef __invert<_Compare> _Inverted; std::__half_inplace_merge<_AlgPolicy>( _Rv(__p), _Rv(__buff), _RBi(__middle), _RBi(__first), _RBi(__last), _Inverted(__comp)); diff --git a/yass/third_party/libc++/trunk/include/__algorithm/lexicographical_compare_three_way.h b/yass/third_party/libc++/trunk/include/__algorithm/lexicographical_compare_three_way.h index 32de97d07a..50ebdc647a 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/lexicographical_compare_three_way.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/lexicographical_compare_three_way.h @@ -17,7 +17,7 @@ #include <__config> #include <__iterator/iterator_traits.h> #include <__type_traits/common_type.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__algorithm/mismatch.h b/yass/third_party/libc++/trunk/include/__algorithm/mismatch.h index d345b6048a..4eb693a1f2 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/mismatch.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/mismatch.h @@ -11,23 +11,93 @@ #define _LIBCPP___ALGORITHM_MISMATCH_H #include <__algorithm/comp.h> +#include <__algorithm/simd_utils.h> +#include <__algorithm/unwrap_iter.h> #include <__config> -#include <__iterator/iterator_traits.h> +#include <__functional/identity.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/is_equality_comparable.h> +#include <__type_traits/operation_traits.h> +#include <__utility/move.h> #include <__utility/pair.h> +#include <__utility/unreachable.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> +__mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { + while (__first1 != __last1) { + if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) + break; + ++__first1; + ++__first2; + } + return std::make_pair(std::move(__first1), std::move(__first2)); +} + +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> +__mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { + return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); +} + +#if _LIBCPP_VECTORIZE_ALGORITHMS + +template ::value && __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value && + __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, + int> = 0> +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { + constexpr size_t __unroll_count = 4; + constexpr size_t __vec_size = __native_vector_size<_Tp>; + using __vec = __simd_vector<_Tp, __vec_size>; + if (!__libcpp_is_constant_evaluated()) { + while (static_cast(__last1 - __first1) >= __unroll_count * __vec_size) [[__unlikely__]] { + __vec __lhs[__unroll_count]; + __vec __rhs[__unroll_count]; + + for (size_t __i = 0; __i != __unroll_count; ++__i) { + __lhs[__i] = std::__load_vector<__vec>(__first1 + __i * __vec_size); + __rhs[__i] = std::__load_vector<__vec>(__first2 + __i * __vec_size); + } + + for (size_t __i = 0; __i != __unroll_count; ++__i) { + if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) { + auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res); + return {__first1 + __offset, __first2 + __offset}; + } + } + + __first1 += __unroll_count * __vec_size; + __first2 += __unroll_count * __vec_size; + } + } + // TODO: Consider vectorizing the tail + return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); +} + +#endif // _LIBCPP_VECTORIZE_ALGORITHMS + template _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) { - for (; __first1 != __last1; ++__first1, (void)++__first2) - if (!__pred(*__first1, *__first2)) - break; - return pair<_InputIterator1, _InputIterator2>(__first1, __first2); + __identity __proj; + auto __res = std::__mismatch( + std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj); + return std::make_pair(std::__rewrap_iter(__first1, __res.first), std::__rewrap_iter(__first2, __res.second)); } template @@ -59,4 +129,6 @@ mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_MISMATCH_H diff --git a/yass/third_party/libc++/trunk/include/__algorithm/move.h b/yass/third_party/libc++/trunk/include/__algorithm/move.h index dba6d487ff..1716d43e2a 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/move.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/move.h @@ -16,7 +16,7 @@ #include <__config> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -34,7 +34,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIte __move(_InIter __first, _Sent __last, _OutIter __result); template -struct __move_loop { +struct __move_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -95,9 +95,7 @@ struct __move_loop { __local_first = _Traits::__begin(++__segment_iterator); } } -}; -struct __move_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -109,7 +107,7 @@ struct __move_trivial { template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __move(_InIter __first, _Sent __last, _OutIter __result) { - return std::__dispatch_copy_or_move<_AlgPolicy, __move_loop<_AlgPolicy>, __move_trivial>( + return std::__copy_move_unwrap_iters<__move_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/yass/third_party/libc++/trunk/include/__algorithm/move_backward.h b/yass/third_party/libc++/trunk/include/__algorithm/move_backward.h index aeedf4241d..4beb7bdbaa 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/move_backward.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/move_backward.h @@ -15,7 +15,7 @@ #include <__config> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1 __move_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result); template -struct __move_backward_loop { +struct __move_backward_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -104,9 +104,7 @@ struct __move_backward_loop { __local_last = _Traits::__end(--__segment_iterator); } } -}; -struct __move_backward_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -122,7 +120,7 @@ __move_backward(_BidirectionalIterator1 __first, _Sentinel __last, _Bidirectiona std::is_copy_constructible<_BidirectionalIterator1>::value, "Iterators must be copy constructible."); - return std::__dispatch_copy_or_move<_AlgPolicy, __move_backward_loop<_AlgPolicy>, __move_backward_trivial>( + return std::__copy_move_unwrap_iters<__move_backward_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/yass/third_party/libc++/trunk/include/__algorithm/partial_sort.h b/yass/third_party/libc++/trunk/include/__algorithm/partial_sort.h index 85a8fdc77a..7f8d0c4914 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/partial_sort.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/partial_sort.h @@ -18,8 +18,8 @@ #include <__config> #include <__debug_utils/randomize_range.h> #include <__iterator/iterator_traits.h> -#include <__type_traits/is_copy_assignable.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__algorithm/pop_heap.h b/yass/third_party/libc++/trunk/include/__algorithm/pop_heap.h index 798a1d0993..6d23830097 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/pop_heap.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/pop_heap.h @@ -17,8 +17,8 @@ #include <__assert> #include <__config> #include <__iterator/iterator_traits.h> -#include <__type_traits/is_copy_assignable.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__algorithm/push_heap.h b/yass/third_party/libc++/trunk/include/__algorithm/push_heap.h index 7d8720e3a9..ec0b445f2b 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/push_heap.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/push_heap.h @@ -14,8 +14,8 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> -#include <__type_traits/is_copy_assignable.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__algorithm/ranges_ends_with.h b/yass/third_party/libc++/trunk/include/__algorithm/ranges_ends_with.h index c2a3cae9f3..bb01918326 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/ranges_ends_with.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/ranges_ends_with.h @@ -39,7 +39,7 @@ namespace ranges { namespace __ends_with { struct __fn { template - static _LIBCPP_HIDE_FROM_ABI constexpr bool __ends_with_fn_impl_bidirectional( + _LIBCPP_HIDE_FROM_ABI static constexpr bool __ends_with_fn_impl_bidirectional( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, @@ -56,7 +56,7 @@ struct __fn { } template - static _LIBCPP_HIDE_FROM_ABI constexpr bool __ends_with_fn_impl( + _LIBCPP_HIDE_FROM_ABI static constexpr bool __ends_with_fn_impl( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, @@ -65,7 +65,7 @@ struct __fn { _Proj1& __proj1, _Proj2& __proj2) { if constexpr (std::bidirectional_iterator<_Sent1> && std::bidirectional_iterator<_Sent2> && - (!std::random_access_iterator<_Sent1>)&&(!std::random_access_iterator<_Sent2>)) { + (!std::random_access_iterator<_Sent1>) && (!std::random_access_iterator<_Sent2>)) { return __ends_with_fn_impl_bidirectional(__first1, __last1, __first2, __last2, __pred, __proj1, __proj2); } else { diff --git a/yass/third_party/libc++/trunk/include/__algorithm/ranges_starts_with.h b/yass/third_party/libc++/trunk/include/__algorithm/ranges_starts_with.h index 90e184aa9b..7ba8af13a8 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/ranges_starts_with.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/ranges_starts_with.h @@ -42,14 +42,14 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool operator()( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, _Proj1 __proj1 = {}, - _Proj2 __proj2 = {}) const { + _Proj2 __proj2 = {}) { return __mismatch::__fn::__go( std::move(__first1), std::move(__last1), @@ -67,8 +67,8 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( - _Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool + operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { return __mismatch::__fn::__go( ranges::begin(__range1), ranges::end(__range1), diff --git a/yass/third_party/libc++/trunk/include/__algorithm/rotate.h b/yass/third_party/libc++/trunk/include/__algorithm/rotate.h index 9a4d07883e..df4ca95aac 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/rotate.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/rotate.h @@ -15,7 +15,7 @@ #include <__algorithm/swap_ranges.h> #include <__config> #include <__iterator/iterator_traits.h> -#include <__type_traits/is_trivially_move_assignable.h> +#include <__type_traits/is_trivially_assignable.h> #include <__utility/move.h> #include <__utility/pair.h> diff --git a/yass/third_party/libc++/trunk/include/__algorithm/simd_utils.h b/yass/third_party/libc++/trunk/include/__algorithm/simd_utils.h new file mode 100644 index 0000000000..1aedb3db01 --- /dev/null +++ b/yass/third_party/libc++/trunk/include/__algorithm/simd_utils.h @@ -0,0 +1,123 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_SIMD_UTILS_H +#define _LIBCPP___ALGORITHM_SIMD_UTILS_H + +#include <__bit/bit_cast.h> +#include <__bit/countr.h> +#include <__config> +#include <__type_traits/is_arithmetic.h> +#include <__type_traits/is_same.h> +#include <__utility/integer_sequence.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +// TODO: Find out how altivec changes things and allow vectorizations there too. +#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1700 && !defined(__ALTIVEC__) +# define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 +#else +# define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0 +#endif + +#if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS && !defined(__OPTIMIZE_SIZE__) +# define _LIBCPP_VECTORIZE_ALGORITHMS 1 +#else +# define _LIBCPP_VECTORIZE_ALGORITHMS 0 +#endif + +#if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS + +_LIBCPP_BEGIN_NAMESPACE_STD + +// This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance +// in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms +// as far as benchmarks are concerned. +# if defined(__AVX__) +template +inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp); +# elif defined(__SSE__) || defined(__ARM_NEON__) +template +inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp); +# elif defined(__MMX__) +template +inline constexpr size_t __native_vector_size = 8 / sizeof(_Tp); +# else +template +inline constexpr size_t __native_vector_size = 1; +# endif + +template +using __simd_vector __attribute__((__ext_vector_type__(_Np))) = _ArithmeticT; + +template +inline constexpr size_t __simd_vector_size_v = []() -> size_t { + static_assert(_False, "Not a vector!"); +}(); + +template +inline constexpr size_t __simd_vector_size_v<__simd_vector<_Tp, _Np>> = _Np; + +template +_LIBCPP_HIDE_FROM_ABI _Tp __simd_vector_underlying_type_impl(__simd_vector<_Tp, _Np>) { + return _Tp{}; +} + +template +using __simd_vector_underlying_type_t = decltype(std::__simd_vector_underlying_type_impl(_VecT{})); + +// This isn't inlined without always_inline when loading chars. +template +_LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(const _Tp* __ptr) noexcept { + return [=](index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept { + return _VecT{__ptr[_Indices]...}; + }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); +} + +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { + return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector)); +} + +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { + using __mask_vec = __simd_vector; + + // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 + auto __impl = [&](_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { + return std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))); + }; + + if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) { + return __impl(uint8_t{}); + } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) { + return __impl(uint16_t{}); + } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) { + return __impl(uint32_t{}); + } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) { + return __impl(uint64_t{}); + } else { + static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type"); + return 0; + } +} + +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { + return std::__find_first_set(~__vec); +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS + +#endif // _LIBCPP___ALGORITHM_SIMD_UTILS_H diff --git a/yass/third_party/libc++/trunk/include/__algorithm/sort_heap.h b/yass/third_party/libc++/trunk/include/__algorithm/sort_heap.h index 060fc33c3c..f20b110c7f 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/sort_heap.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/sort_heap.h @@ -16,8 +16,8 @@ #include <__config> #include <__debug_utils/strict_weak_ordering_check.h> #include <__iterator/iterator_traits.h> -#include <__type_traits/is_copy_assignable.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__algorithm/stable_sort.h b/yass/third_party/libc++/trunk/include/__algorithm/stable_sort.h index 9be192bd65..726e7e16b3 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/stable_sort.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/stable_sort.h @@ -20,7 +20,7 @@ #include <__memory/destruct_n.h> #include <__memory/temporary_buffer.h> #include <__memory/unique_ptr.h> -#include <__type_traits/is_trivially_copy_assignable.h> +#include <__type_traits/is_trivially_assignable.h> #include <__utility/move.h> #include <__utility/pair.h> #include diff --git a/yass/third_party/libc++/trunk/include/__algorithm/unwrap_iter.h b/yass/third_party/libc++/trunk/include/__algorithm/unwrap_iter.h index 50d815c970..8cc0d22d4f 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/unwrap_iter.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/unwrap_iter.h @@ -13,7 +13,7 @@ #include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> #include <__type_traits/enable_if.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/declval.h> #include <__utility/move.h> diff --git a/yass/third_party/libc++/trunk/include/__algorithm/upper_bound.h b/yass/third_party/libc++/trunk/include/__algorithm/upper_bound.h index f499f7a80a..9c7d8fbcde 100644 --- a/yass/third_party/libc++/trunk/include/__algorithm/upper_bound.h +++ b/yass/third_party/libc++/trunk/include/__algorithm/upper_bound.h @@ -18,7 +18,7 @@ #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__atomic/atomic_base.h b/yass/third_party/libc++/trunk/include/__atomic/atomic_base.h index 6ca01a7f1b..e9badccc25 100644 --- a/yass/third_party/libc++/trunk/include/__atomic/atomic_base.h +++ b/yass/third_party/libc++/trunk/include/__atomic/atomic_base.h @@ -18,7 +18,7 @@ #include <__config> #include <__memory/addressof.h> #include <__type_traits/is_integral.h> -#include <__type_traits/is_nothrow_default_constructible.h> +#include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_same.h> #include diff --git a/yass/third_party/libc++/trunk/include/__availability b/yass/third_party/libc++/trunk/include/__availability index 78438c55a3..bb3ed0a8da 100644 --- a/yass/third_party/libc++/trunk/include/__availability +++ b/yass/third_party/libc++/trunk/include/__availability @@ -72,11 +72,10 @@ # endif #endif -// Availability markup is disabled when building the library, or when the compiler +// Availability markup is disabled when building the library, or when a non-Clang +// compiler is used because only Clang supports the necessary attributes. // doesn't support the proper attributes. -#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ - !__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) || \ - !__has_extension(pragma_clang_attribute_external_declaration) +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif diff --git a/yass/third_party/libc++/trunk/include/__bit/bit_cast.h b/yass/third_party/libc++/trunk/include/__bit/bit_cast.h index f20b39ae74..6298810f37 100644 --- a/yass/third_party/libc++/trunk/include/__bit/bit_cast.h +++ b/yass/third_party/libc++/trunk/include/__bit/bit_cast.h @@ -19,6 +19,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD +#ifndef _LIBCPP_CXX03_LANG + +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept { + return __builtin_bit_cast(_ToType, __from); +} + +#endif // _LIBCPP_CXX03_LANG + #if _LIBCPP_STD_VER >= 20 template diff --git a/yass/third_party/libc++/trunk/include/__bit/countr.h b/yass/third_party/libc++/trunk/include/__bit/countr.h index 0cc679f87a..b6b3ac52ca 100644 --- a/yass/third_party/libc++/trunk/include/__bit/countr.h +++ b/yass/third_party/libc++/trunk/include/__bit/countr.h @@ -35,10 +35,8 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct return __builtin_ctzll(__x); } -#if _LIBCPP_STD_VER >= 20 - -template <__libcpp_unsigned_integer _Tp> -_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept { +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { if (__t == 0) return numeric_limits<_Tp>::digits; @@ -59,6 +57,13 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) n } } +#if _LIBCPP_STD_VER >= 20 + +template <__libcpp_unsigned_integer _Tp> +_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept { + return std::__countr_zero(__t); +} + template <__libcpp_unsigned_integer _Tp> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept { return __t != numeric_limits<_Tp>::max() ? std::countr_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits; diff --git a/yass/third_party/libc++/trunk/include/__bit_reference b/yass/third_party/libc++/trunk/include/__bit_reference index 3a5339b72d..9579b9eaf7 100644 --- a/yass/third_party/libc++/trunk/include/__bit_reference +++ b/yass/third_party/libc++/trunk/include/__bit_reference @@ -171,61 +171,6 @@ private: __bit_const_reference& operator=(const __bit_const_reference&) = delete; }; -// fill_n - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void -__fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { - using _It = __bit_iterator<_Cp, false>; - using __storage_type = typename _It::__storage_type; - - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = std::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - if (_FillVal) - *__first.__seg_ |= __m; - else - *__first.__seg_ &= ~__m; - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - __storage_type __nw = __n / __bits_per_word; - std::fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0); - __n -= __nw * __bits_per_word; - // do last partial word - if (__n > 0) { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (_FillVal) - *__first.__seg_ |= __m; - else - *__first.__seg_ &= ~__m; - } -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __value) { - if (__n > 0) { - if (__value) - std::__fill_n(__first, __n); - else - std::__fill_n(__first, __n); - } -} - -// fill - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -fill(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __last, bool __value) { - std::fill_n(__first, static_cast(__last - __first), __value); -} - // copy template @@ -1007,8 +952,10 @@ private: friend class __bit_iterator<_Cp, true>; template friend struct __bit_array; + template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend void __fill_n(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend void + __fill_n_bool(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_aligned( diff --git a/yass/third_party/libc++/trunk/include/__chrono/tzdb_list.h b/yass/third_party/libc++/trunk/include/__chrono/tzdb_list.h index 112e04ff2e..e8aaf31e36 100644 --- a/yass/third_party/libc++/trunk/include/__chrono/tzdb_list.h +++ b/yass/third_party/libc++/trunk/include/__chrono/tzdb_list.h @@ -52,19 +52,29 @@ public: using const_iterator = forward_list::const_iterator; - _LIBCPP_NODISCARD_EXT _LIBCPP_EXPORTED_FROM_ABI const tzdb& front() const noexcept; + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const tzdb& front() const noexcept { return __front(); } - _LIBCPP_EXPORTED_FROM_ABI const_iterator erase_after(const_iterator __p); + _LIBCPP_HIDE_FROM_ABI const_iterator erase_after(const_iterator __p) { return __erase_after(__p); } - _LIBCPP_NODISCARD_EXT _LIBCPP_EXPORTED_FROM_ABI const_iterator begin() const noexcept; - _LIBCPP_NODISCARD_EXT _LIBCPP_EXPORTED_FROM_ABI const_iterator end() const noexcept; + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { return __begin(); } + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { return __end(); } - _LIBCPP_NODISCARD_EXT _LIBCPP_EXPORTED_FROM_ABI const_iterator cbegin() const noexcept; - _LIBCPP_NODISCARD_EXT _LIBCPP_EXPORTED_FROM_ABI const_iterator cend() const noexcept; + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return __cbegin(); } + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return __cend(); } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __impl& __implementation() { return *__impl_; } private: + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI const tzdb& __front() const noexcept; + + _LIBCPP_EXPORTED_FROM_ABI const_iterator __erase_after(const_iterator __p); + + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI const_iterator __begin() const noexcept; + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI const_iterator __end() const noexcept; + + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI const_iterator __cbegin() const noexcept; + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI const_iterator __cend() const noexcept; + __impl* __impl_; }; diff --git a/yass/third_party/libc++/trunk/include/__compare/partial_order.h b/yass/third_party/libc++/trunk/include/__compare/partial_order.h index f3ed4900fb..1d2fae63e5 100644 --- a/yass/third_party/libc++/trunk/include/__compare/partial_order.h +++ b/yass/third_party/libc++/trunk/include/__compare/partial_order.h @@ -28,6 +28,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD // [cmp.alg] namespace __partial_order { +void partial_order() = delete; + struct __fn { // NOLINTBEGIN(libcpp-robust-against-adl) partial_order should use ADL, but only here template diff --git a/yass/third_party/libc++/trunk/include/__compare/strong_order.h b/yass/third_party/libc++/trunk/include/__compare/strong_order.h index 3dc819e642..8c363b5638 100644 --- a/yass/third_party/libc++/trunk/include/__compare/strong_order.h +++ b/yass/third_party/libc++/trunk/include/__compare/strong_order.h @@ -37,6 +37,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD // [cmp.alg] namespace __strong_order { +void strong_order() = delete; + struct __fn { // NOLINTBEGIN(libcpp-robust-against-adl) strong_order should use ADL, but only here template diff --git a/yass/third_party/libc++/trunk/include/__compare/weak_order.h b/yass/third_party/libc++/trunk/include/__compare/weak_order.h index b82a708c29..1a3e85feb2 100644 --- a/yass/third_party/libc++/trunk/include/__compare/weak_order.h +++ b/yass/third_party/libc++/trunk/include/__compare/weak_order.h @@ -30,6 +30,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD // [cmp.alg] namespace __weak_order { +void weak_order() = delete; + struct __fn { // NOLINTBEGIN(libcpp-robust-against-adl) weak_order should use ADL, but only here template diff --git a/yass/third_party/libc++/trunk/include/__concepts/class_or_enum.h b/yass/third_party/libc++/trunk/include/__concepts/class_or_enum.h index c1b4a8c258..2739e31e14 100644 --- a/yass/third_party/libc++/trunk/include/__concepts/class_or_enum.h +++ b/yass/third_party/libc++/trunk/include/__concepts/class_or_enum.h @@ -28,11 +28,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template concept __class_or_enum = is_class_v<_Tp> || is_union_v<_Tp> || is_enum_v<_Tp>; -// Work around Clang bug https://llvm.org/PR52970 -// TODO: remove this workaround once libc++ no longer has to support Clang 13 (it was fixed in Clang 14). -template -concept __workaround_52970 = is_class_v<__remove_cvref_t<_Tp>> || is_union_v<__remove_cvref_t<_Tp>>; - #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/yass/third_party/libc++/trunk/include/__concepts/swappable.h b/yass/third_party/libc++/trunk/include/__concepts/swappable.h index 1337dc49d7..d339488a08 100644 --- a/yass/third_party/libc++/trunk/include/__concepts/swappable.h +++ b/yass/third_party/libc++/trunk/include/__concepts/swappable.h @@ -15,8 +15,8 @@ #include <__concepts/constructible.h> #include <__config> #include <__type_traits/extent.h> -#include <__type_traits/is_nothrow_move_assignable.h> -#include <__type_traits/is_nothrow_move_constructible.h> +#include <__type_traits/is_nothrow_assignable.h> +#include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/remove_cvref.h> #include <__utility/exchange.h> #include <__utility/forward.h> diff --git a/yass/third_party/libc++/trunk/include/__config b/yass/third_party/libc++/trunk/include/__config index 8d4d17378b..8550b1da4a 100644 --- a/yass/third_party/libc++/trunk/include/__config +++ b/yass/third_party/libc++/trunk/include/__config @@ -174,6 +174,12 @@ // The implementation moved to the header, but we still export the symbols from // the dylib for backwards compatibility. # define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 +// Define std::array/std::string_view iterators to be __wrap_iters instead of raw +// pointers, which prevents people from relying on a non-portable implementation +// detail. This is especially useful because enabling bounded iterators hardening +// requires code not to make these assumptions. +# define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_ARRAY +# define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_STRING_VIEW # elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support @@ -400,6 +406,10 @@ _LIBCPP_HARDENING_MODE_DEBUG # define __has_include(...) 0 # endif +# ifndef __has_warning +# define __has_warning(...) 0 +# endif + # if !defined(_LIBCPP_COMPILER_CLANG_BASED) && __cplusplus < 201103L # error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11" # endif @@ -717,6 +727,23 @@ typedef __char32_t char32_t; # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE # endif +# ifdef _LIBCPP_COMPILER_CLANG_BASED +# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str)) +# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) +# elif defined(_LIBCPP_COMPILER_GCC) +# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) +# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str)) +# else +# define _LIBCPP_DIAGNOSTIC_PUSH +# define _LIBCPP_DIAGNOSTIC_POP +# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) +# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) +# endif + # if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST # define _LIBCPP_HARDENING_SIG f # elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE @@ -778,6 +805,12 @@ typedef __char32_t char32_t; // the implementation of a virtual function in an ABI-incompatible way in the first place, // since that would be an ABI break anyway. Hence, the lack of ABI tag should not be noticeable. // +// The macro can be applied to record and enum types. When the tagged type is nested in +// a record this "parent" record needs to have the macro too. Another use case for applying +// this macro to records and unions is to apply an ABI tag to inline constexpr variables. +// This can be useful for inline variables that are implementation details which are expected +// to change in the future. +// // TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing // the length of symbols with an ABI tag. In practice, we should remove the escape hatch and // use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70. @@ -804,16 +837,45 @@ typedef __char32_t char32_t; # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # endif +// TODO: Remove this workaround once we drop support for Clang 16 +#if __has_warning("-Wc++23-extensions") +# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++23-extensions") +#else +# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++2b-extensions") +#endif + +// Clang modules take a significant compile time hit when pushing and popping diagnostics. +// Since all the headers are marked as system headers in the modulemap, we can simply disable this +// pushing and popping when building with clang modules. +# if !__has_feature(modules) +# define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ + _LIBCPP_DIAGNOSTIC_PUSH \ + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++11-extensions") \ + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION \ + _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ + _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ + _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ + _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++23-extensions") +# define _LIBCPP_POP_EXTENSION_DIAGNOSTICS _LIBCPP_DIAGNOSTIC_POP +# else +# define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS +# define _LIBCPP_POP_EXTENSION_DIAGNOSTICS +# endif + // Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. // clang-format off -# define _LIBCPP_BEGIN_NAMESPACE_STD namespace _LIBCPP_TYPE_VISIBILITY_DEFAULT std { \ +# define _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ + namespace _LIBCPP_TYPE_VISIBILITY_DEFAULT std { \ inline namespace _LIBCPP_ABI_NAMESPACE { -# define _LIBCPP_END_NAMESPACE_STD }} +# define _LIBCPP_END_NAMESPACE_STD }} _LIBCPP_POP_EXTENSION_DIAGNOSTICS # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD \ inline namespace __fs { namespace filesystem { -# define _LIBCPP_END_NAMESPACE_FILESYSTEM _LIBCPP_END_NAMESPACE_STD }} +# define _LIBCPP_END_NAMESPACE_FILESYSTEM }} _LIBCPP_END_NAMESPACE_STD // clang-format on # if __has_attribute(__enable_if__) @@ -1194,8 +1256,6 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c # endif // _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES # if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES) -# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS -# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION # define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS # define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS # define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR @@ -1250,23 +1310,6 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c // the ABI inconsistent. # endif -# ifdef _LIBCPP_COMPILER_CLANG_BASED -# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str)) -# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) -# elif defined(_LIBCPP_COMPILER_GCC) -# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) -# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str)) -# else -# define _LIBCPP_DIAGNOSTIC_PUSH -# define _LIBCPP_DIAGNOSTIC_POP -# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) -# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) -# endif - // c8rtomb() and mbrtoc8() were added in C++20 and C23. Support for these // functions is gradually being added to existing C libraries. The conditions // below check for known C library versions and conditions under which these diff --git a/yass/third_party/libc++/trunk/include/__exception/exception_ptr.h b/yass/third_party/libc++/trunk/include/__exception/exception_ptr.h index 53e2f718bc..c9027de923 100644 --- a/yass/third_party/libc++/trunk/include/__exception/exception_ptr.h +++ b/yass/third_party/libc++/trunk/include/__exception/exception_ptr.h @@ -26,6 +26,8 @@ #ifndef _LIBCPP_ABI_MICROSOFT +# if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION + namespace __cxxabiv1 { extern "C" { @@ -37,14 +39,16 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS __cxa_exception* __cxa_init_primary_exception( void*, std::type_info*, void( -# if defined(_WIN32) +# if defined(_WIN32) __thiscall -# endif +# endif *)(void*)) throw(); } } // namespace __cxxabiv1 +# endif + #endif namespace std { // purposefully not using versioning namespace diff --git a/yass/third_party/libc++/trunk/include/__exception/nested_exception.h b/yass/third_party/libc++/trunk/include/__exception/nested_exception.h index 417db54e6e..1bf2df9392 100644 --- a/yass/third_party/libc++/trunk/include/__exception/nested_exception.h +++ b/yass/third_party/libc++/trunk/include/__exception/nested_exception.h @@ -15,8 +15,8 @@ #include <__type_traits/decay.h> #include <__type_traits/is_base_of.h> #include <__type_traits/is_class.h> +#include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> -#include <__type_traits/is_copy_constructible.h> #include <__type_traits/is_final.h> #include <__type_traits/is_polymorphic.h> #include <__utility/forward.h> diff --git a/yass/third_party/libc++/trunk/include/__expected/expected.h b/yass/third_party/libc++/trunk/include/__expected/expected.h index 443d9257dc..d7adaac756 100644 --- a/yass/third_party/libc++/trunk/include/__expected/expected.h +++ b/yass/third_party/libc++/trunk/include/__expected/expected.h @@ -23,24 +23,14 @@ #include <__type_traits/is_assignable.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> -#include <__type_traits/is_copy_assignable.h> -#include <__type_traits/is_copy_constructible.h> -#include <__type_traits/is_default_constructible.h> #include <__type_traits/is_function.h> -#include <__type_traits/is_move_assignable.h> -#include <__type_traits/is_move_constructible.h> +#include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_nothrow_copy_assignable.h> -#include <__type_traits/is_nothrow_copy_constructible.h> -#include <__type_traits/is_nothrow_default_constructible.h> -#include <__type_traits/is_nothrow_move_assignable.h> -#include <__type_traits/is_nothrow_move_constructible.h> #include <__type_traits/is_reference.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> -#include <__type_traits/is_trivially_copy_constructible.h> +#include <__type_traits/is_trivially_constructible.h> #include <__type_traits/is_trivially_destructible.h> -#include <__type_traits/is_trivially_move_constructible.h> #include <__type_traits/is_void.h> #include <__type_traits/lazy.h> #include <__type_traits/negation.h> diff --git a/yass/third_party/libc++/trunk/include/__format/concepts.h b/yass/third_party/libc++/trunk/include/__format/concepts.h index 299c5f40ee..13380e9b91 100644 --- a/yass/third_party/libc++/trunk/include/__format/concepts.h +++ b/yass/third_party/libc++/trunk/include/__format/concepts.h @@ -13,12 +13,14 @@ #include <__concepts/same_as.h> #include <__concepts/semiregular.h> #include <__config> -#include <__format/format_fwd.h> #include <__format/format_parse_context.h> +#include <__fwd/format.h> +#include <__fwd/tuple.h> +#include <__tuple/tuple_size.h> #include <__type_traits/is_specialization.h> #include <__type_traits/remove_const.h> +#include <__type_traits/remove_reference.h> #include <__utility/pair.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/yass/third_party/libc++/trunk/include/__format/container_adaptor.h b/yass/third_party/libc++/trunk/include/__format/container_adaptor.h index ec806ef16b..9f49ca03bf 100644 --- a/yass/third_party/libc++/trunk/include/__format/container_adaptor.h +++ b/yass/third_party/libc++/trunk/include/__format/container_adaptor.h @@ -18,11 +18,11 @@ #include <__format/concepts.h> #include <__format/formatter.h> #include <__format/range_default_formatter.h> +#include <__fwd/queue.h> +#include <__fwd/stack.h> #include <__ranges/ref_view.h> #include <__type_traits/is_const.h> #include <__type_traits/maybe_const.h> -#include -#include _LIBCPP_BEGIN_NAMESPACE_STD diff --git a/yass/third_party/libc++/trunk/include/__format/escaped_output_table.h b/yass/third_party/libc++/trunk/include/__format/escaped_output_table.h index 495a2fbc7b..e9f4a6e4f6 100644 --- a/yass/third_party/libc++/trunk/include/__format/escaped_output_table.h +++ b/yass/third_party/libc++/trunk/include/__format/escaped_output_table.h @@ -110,7 +110,7 @@ namespace __escaped_output_table { /// - bits [0, 10] The size of the range, allowing 2048 elements. /// - bits [11, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. -inline constexpr uint32_t __entries[893] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[893] = { 0x00000020, 0x0003f821, 0x00056800, diff --git a/yass/third_party/libc++/trunk/include/__format/extended_grapheme_cluster_table.h b/yass/third_party/libc++/trunk/include/__format/extended_grapheme_cluster_table.h index 9616dfecd6..48581d8a5d 100644 --- a/yass/third_party/libc++/trunk/include/__format/extended_grapheme_cluster_table.h +++ b/yass/third_party/libc++/trunk/include/__format/extended_grapheme_cluster_table.h @@ -125,7 +125,7 @@ enum class __property : uint8_t { /// following benchmark. /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp // clang-format off -inline constexpr uint32_t __entries[1496] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x00000091, 0x00005005, 0x00005811, diff --git a/yass/third_party/libc++/trunk/include/__format/format_arg.h b/yass/third_party/libc++/trunk/include/__format/format_arg.h index b786ac3b36..4924e5fb32 100644 --- a/yass/third_party/libc++/trunk/include/__format/format_arg.h +++ b/yass/third_party/libc++/trunk/include/__format/format_arg.h @@ -14,9 +14,9 @@ #include <__concepts/arithmetic.h> #include <__config> #include <__format/concepts.h> -#include <__format/format_fwd.h> #include <__format/format_parse_context.h> #include <__functional/invoke.h> +#include <__fwd/format.h> #include <__memory/addressof.h> #include <__type_traits/conditional.h> #include <__utility/forward.h> diff --git a/yass/third_party/libc++/trunk/include/__format/format_arg_store.h b/yass/third_party/libc++/trunk/include/__format/format_arg_store.h index 066cd369eb..23a599e995 100644 --- a/yass/third_party/libc++/trunk/include/__format/format_arg_store.h +++ b/yass/third_party/libc++/trunk/include/__format/format_arg_store.h @@ -151,7 +151,7 @@ consteval __arg_t __determine_arg_t() { // The overload for not formattable types allows triggering the static // assertion below. template - requires(!__formattable<_Tp, typename _Context::char_type>) + requires(!__formattable_with<_Tp, _Context>) consteval __arg_t __determine_arg_t() { return __arg_t::__none; } @@ -165,7 +165,6 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu using _Dp = remove_const_t<_Tp>; constexpr __arg_t __arg = __determine_arg_t<_Context, _Dp>(); static_assert(__arg != __arg_t::__none, "the supplied type is not formattable"); - static_assert(__formattable_with<_Tp, _Context>); // Not all types can be used to directly initialize the diff --git a/yass/third_party/libc++/trunk/include/__format/format_args.h b/yass/third_party/libc++/trunk/include/__format/format_args.h index 9e0afecc0a..79fe51f96c 100644 --- a/yass/third_party/libc++/trunk/include/__format/format_args.h +++ b/yass/third_party/libc++/trunk/include/__format/format_args.h @@ -14,7 +14,7 @@ #include <__config> #include <__format/format_arg.h> #include <__format/format_arg_store.h> -#include <__format/format_fwd.h> +#include <__fwd/format.h> #include #include diff --git a/yass/third_party/libc++/trunk/include/__format/format_context.h b/yass/third_party/libc++/trunk/include/__format/format_context.h index 68dcdb49d3..bf603c5c62 100644 --- a/yass/third_party/libc++/trunk/include/__format/format_context.h +++ b/yass/third_party/libc++/trunk/include/__format/format_context.h @@ -18,7 +18,7 @@ #include <__format/format_arg_store.h> #include <__format/format_args.h> #include <__format/format_error.h> -#include <__format/format_fwd.h> +#include <__fwd/format.h> #include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include <__memory/addressof.h> @@ -27,7 +27,7 @@ #include #ifndef _LIBCPP_HAS_NO_LOCALIZATION -# include +# include <__locale> # include #endif diff --git a/yass/third_party/libc++/trunk/include/__format/format_functions.h b/yass/third_party/libc++/trunk/include/__format/format_functions.h index 3ee53539f4..c781014010 100644 --- a/yass/third_party/libc++/trunk/include/__format/format_functions.h +++ b/yass/third_party/libc++/trunk/include/__format/format_functions.h @@ -41,7 +41,7 @@ #include #ifndef _LIBCPP_HAS_NO_LOCALIZATION -# include +# include <__locale> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__format/formatter.h b/yass/third_party/libc++/trunk/include/__format/formatter.h index 079befc5bd..47e35789b8 100644 --- a/yass/third_party/libc++/trunk/include/__format/formatter.h +++ b/yass/third_party/libc++/trunk/include/__format/formatter.h @@ -12,7 +12,7 @@ #include <__availability> #include <__config> -#include <__format/format_fwd.h> +#include <__fwd/format.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/yass/third_party/libc++/trunk/include/__format/formatter_floating_point.h b/yass/third_party/libc++/trunk/include/__format/formatter_floating_point.h index f01d323eff..1d94cc349c 100644 --- a/yass/third_party/libc++/trunk/include/__format/formatter_floating_point.h +++ b/yass/third_party/libc++/trunk/include/__format/formatter_floating_point.h @@ -39,7 +39,7 @@ #include #ifndef _LIBCPP_HAS_NO_LOCALIZATION -# include +# include <__locale> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__format/parser_std_format_spec.h b/yass/third_party/libc++/trunk/include/__format/parser_std_format_spec.h index a4b47abff4..9818f37b51 100644 --- a/yass/third_party/libc++/trunk/include/__format/parser_std_format_spec.h +++ b/yass/third_party/libc++/trunk/include/__format/parser_std_format_spec.h @@ -129,8 +129,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_C /// /// They default to false so when a new field is added it needs to be opted in /// explicitly. -// TODO FMT Use an ABI tag for this struct. -struct __fields { +struct _LIBCPP_HIDE_FROM_ABI __fields { uint16_t __sign_ : 1 {false}; uint16_t __alternate_form_ : 1 {false}; uint16_t __zero_padding_ : 1 {false}; diff --git a/yass/third_party/libc++/trunk/include/__format/width_estimation_table.h b/yass/third_party/libc++/trunk/include/__format/width_estimation_table.h index cfb488975d..6309483367 100644 --- a/yass/third_party/libc++/trunk/include/__format/width_estimation_table.h +++ b/yass/third_party/libc++/trunk/include/__format/width_estimation_table.h @@ -119,7 +119,7 @@ namespace __width_estimation_table { /// - bits [0, 13] The size of the range, allowing 16384 elements. /// - bits [14, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. -inline constexpr uint32_t __entries[108] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[108] = { 0x0440005f /* 00001100 - 0000115f [ 96] */, // 0x08c68001 /* 0000231a - 0000231b [ 2] */, // 0x08ca4001 /* 00002329 - 0000232a [ 2] */, // diff --git a/yass/third_party/libc++/trunk/include/__functional/bind_front.h b/yass/third_party/libc++/trunk/include/__functional/bind_front.h index 30dda53361..11d0bac3f8 100644 --- a/yass/third_party/libc++/trunk/include/__functional/bind_front.h +++ b/yass/third_party/libc++/trunk/include/__functional/bind_front.h @@ -17,7 +17,6 @@ #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> -#include <__type_traits/is_move_constructible.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__functional/function.h b/yass/third_party/libc++/trunk/include/__functional/function.h index 416c26a0c7..1faa9e92eb 100644 --- a/yass/third_party/libc++/trunk/include/__functional/function.h +++ b/yass/third_party/libc++/trunk/include/__functional/function.h @@ -28,7 +28,7 @@ #include <__type_traits/decay.h> #include <__type_traits/is_core_convertible.h> #include <__type_traits/is_scalar.h> -#include <__type_traits/is_trivially_copy_constructible.h> +#include <__type_traits/is_trivially_constructible.h> #include <__type_traits/is_trivially_destructible.h> #include <__type_traits/is_void.h> #include <__type_traits/strip_signature.h> @@ -768,7 +768,7 @@ public: { } - virtual __base<_Rp(_ArgTypes...)>* __clone() const { + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual __base<_Rp(_ArgTypes...)>* __clone() const { _LIBCPP_ASSERT_INTERNAL( false, "Block pointers are just pointers, so they should always fit into " @@ -777,9 +777,11 @@ public: return nullptr; } - virtual void __clone(__base<_Rp(_ArgTypes...)>* __p) const { ::new ((void*)__p) __func(__f_); } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __clone(__base<_Rp(_ArgTypes...)>* __p) const { + ::new ((void*)__p) __func(__f_); + } - virtual void destroy() _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy() _NOEXCEPT { # ifndef _LIBCPP_HAS_OBJC_ARC if (__f_) _Block_release(__f_); @@ -787,7 +789,7 @@ public: __f_ = 0; } - virtual void destroy_deallocate() _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy_deallocate() _NOEXCEPT { _LIBCPP_ASSERT_INTERNAL( false, "Block pointers are just pointers, so they should always fit into " @@ -795,16 +797,20 @@ public: "never be invoked."); } - virtual _Rp operator()(_ArgTypes&&... __arg) { return std::__invoke(__f_, std::forward<_ArgTypes>(__arg)...); } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual _Rp operator()(_ArgTypes&&... __arg) { + return std::__invoke(__f_, std::forward<_ArgTypes>(__arg)...); + } # ifndef _LIBCPP_HAS_NO_RTTI - virtual const void* target(type_info const& __ti) const _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const void* target(type_info const& __ti) const _NOEXCEPT { if (__ti == typeid(__func::__block_type)) return &__f_; return (const void*)nullptr; } - virtual const std::type_info& target_type() const _NOEXCEPT { return typeid(__func::__block_type); } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const std::type_info& target_type() const _NOEXCEPT { + return typeid(__func::__block_type); + } # endif // _LIBCPP_HAS_NO_RTTI }; diff --git a/yass/third_party/libc++/trunk/include/__functional/hash.h b/yass/third_party/libc++/trunk/include/__functional/hash.h index a466c83703..a9e450edd3 100644 --- a/yass/third_party/libc++/trunk/include/__functional/hash.h +++ b/yass/third_party/libc++/trunk/include/__functional/hash.h @@ -10,23 +10,18 @@ #define _LIBCPP___FUNCTIONAL_HASH_H #include <__config> -#include <__functional/invoke.h> #include <__functional/unary_function.h> #include <__fwd/functional.h> -#include <__tuple/sfinae_helpers.h> -#include <__type_traits/is_copy_constructible.h> -#include <__type_traits/is_default_constructible.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_constructible.h> #include <__type_traits/is_enum.h> -#include <__type_traits/is_move_constructible.h> #include <__type_traits/underlying_type.h> -#include <__utility/forward.h> -#include <__utility/move.h> #include <__utility/pair.h> #include <__utility/swap.h> #include #include #include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/yass/third_party/libc++/trunk/include/__functional/not_fn.h b/yass/third_party/libc++/trunk/include/__functional/not_fn.h index 23a491c135..4b3ce5524a 100644 --- a/yass/third_party/libc++/trunk/include/__functional/not_fn.h +++ b/yass/third_party/libc++/trunk/include/__functional/not_fn.h @@ -16,7 +16,6 @@ #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> -#include <__type_traits/is_move_constructible.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__fwd/array.h b/yass/third_party/libc++/trunk/include/__fwd/array.h index ff3a3eeeef..b429d0c5a9 100644 --- a/yass/third_party/libc++/trunk/include/__fwd/array.h +++ b/yass/third_party/libc++/trunk/include/__fwd/array.h @@ -35,6 +35,12 @@ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&& get(const array<_Tp, _Size>&&) _NOEXCEPT; #endif +template +struct __is_std_array : false_type {}; + +template +struct __is_std_array > : true_type {}; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FWD_ARRAY_H diff --git a/yass/third_party/libc++/trunk/include/__fwd/deque.h b/yass/third_party/libc++/trunk/include/__fwd/deque.h new file mode 100644 index 0000000000..fd2fb5bb4b --- /dev/null +++ b/yass/third_party/libc++/trunk/include/__fwd/deque.h @@ -0,0 +1,26 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_DEQUE_H +#define _LIBCPP___FWD_DEQUE_H + +#include <__config> +#include <__fwd/memory.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template > +class _LIBCPP_TEMPLATE_VIS deque; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_DEQUE_H diff --git a/yass/third_party/libc++/trunk/include/__format/format_fwd.h b/yass/third_party/libc++/trunk/include/__fwd/format.h similarity index 89% rename from yass/third_party/libc++/trunk/include/__format/format_fwd.h rename to yass/third_party/libc++/trunk/include/__fwd/format.h index 120b2fc8d4..6f5c712437 100644 --- a/yass/third_party/libc++/trunk/include/__format/format_fwd.h +++ b/yass/third_party/libc++/trunk/include/__fwd/format.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___FORMAT_FORMAT_FWD_H -#define _LIBCPP___FORMAT_FORMAT_FWD_H +#ifndef _LIBCPP___FWD_FORMAT_H +#define _LIBCPP___FWD_FORMAT_H #include <__availability> #include <__config> @@ -36,4 +36,4 @@ struct _LIBCPP_TEMPLATE_VIS formatter; _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___FORMAT_FORMAT_FWD_H +#endif // _LIBCPP___FWD_FORMAT_H diff --git a/yass/third_party/libc++/trunk/include/__type_traits/is_default_constructible.h b/yass/third_party/libc++/trunk/include/__fwd/memory.h similarity index 51% rename from yass/third_party/libc++/trunk/include/__type_traits/is_default_constructible.h rename to yass/third_party/libc++/trunk/include/__fwd/memory.h index e73e9b98f5..b9e151855a 100644 --- a/yass/third_party/libc++/trunk/include/__type_traits/is_default_constructible.h +++ b/yass/third_party/libc++/trunk/include/__fwd/memory.h @@ -1,16 +1,15 @@ -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// -#ifndef _LIBCPP___TYPE_TRAITS_IS_DEFAULT_CONSTRUCTIBLE_H -#define _LIBCPP___TYPE_TRAITS_IS_DEFAULT_CONSTRUCTIBLE_H +#ifndef _LIBCPP___FWD_MEMORY_H +#define _LIBCPP___FWD_MEMORY_H #include <__config> -#include <__type_traits/integral_constant.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -19,13 +18,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct _LIBCPP_TEMPLATE_VIS is_default_constructible : public integral_constant {}; - -#if _LIBCPP_STD_VER >= 17 -template -inline constexpr bool is_default_constructible_v = __is_constructible(_Tp); -#endif +class _LIBCPP_TEMPLATE_VIS allocator; _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___TYPE_TRAITS_IS_DEFAULT_CONSTRUCTIBLE_H +#endif // _LIBCPP___FWD_MEMORY_H diff --git a/yass/third_party/libc++/trunk/include/__fwd/queue.h b/yass/third_party/libc++/trunk/include/__fwd/queue.h new file mode 100644 index 0000000000..50d99ad9c2 --- /dev/null +++ b/yass/third_party/libc++/trunk/include/__fwd/queue.h @@ -0,0 +1,31 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_QUEUE_H +#define _LIBCPP___FWD_QUEUE_H + +#include <__config> +#include <__functional/operations.h> +#include <__fwd/deque.h> +#include <__fwd/vector.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template > +class _LIBCPP_TEMPLATE_VIS queue; + +template , class _Compare = less > +class _LIBCPP_TEMPLATE_VIS priority_queue; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_QUEUE_H diff --git a/yass/third_party/libc++/trunk/include/__fwd/sstream.h b/yass/third_party/libc++/trunk/include/__fwd/sstream.h index e2d46fbe1d..39a9c3faf1 100644 --- a/yass/third_party/libc++/trunk/include/__fwd/sstream.h +++ b/yass/third_party/libc++/trunk/include/__fwd/sstream.h @@ -10,6 +10,7 @@ #define _LIBCPP___FWD_SSTREAM_H #include <__config> +#include <__fwd/memory.h> #include <__fwd/string.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/yass/third_party/libc++/trunk/include/__fwd/stack.h b/yass/third_party/libc++/trunk/include/__fwd/stack.h new file mode 100644 index 0000000000..7dab6c1a4f --- /dev/null +++ b/yass/third_party/libc++/trunk/include/__fwd/stack.h @@ -0,0 +1,26 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_STACK_H +#define _LIBCPP___FWD_STACK_H + +#include <__config> +#include <__fwd/deque.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template > +class _LIBCPP_TEMPLATE_VIS stack; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_STACK_H diff --git a/yass/third_party/libc++/trunk/include/__fwd/string.h b/yass/third_party/libc++/trunk/include/__fwd/string.h index 032132374d..320c4e4c81 100644 --- a/yass/third_party/libc++/trunk/include/__fwd/string.h +++ b/yass/third_party/libc++/trunk/include/__fwd/string.h @@ -11,6 +11,7 @@ #include <__availability> #include <__config> +#include <__fwd/memory.h> #include <__fwd/memory_resource.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -39,9 +40,6 @@ template <> struct char_traits; #endif -template -class _LIBCPP_TEMPLATE_VIS allocator; - template , class _Allocator = allocator<_CharT> > class _LIBCPP_TEMPLATE_VIS basic_string; diff --git a/yass/third_party/libc++/trunk/include/__fwd/vector.h b/yass/third_party/libc++/trunk/include/__fwd/vector.h new file mode 100644 index 0000000000..c9cc961374 --- /dev/null +++ b/yass/third_party/libc++/trunk/include/__fwd/vector.h @@ -0,0 +1,26 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_VECTOR_H +#define _LIBCPP___FWD_VECTOR_H + +#include <__config> +#include <__fwd/memory.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template > +class _LIBCPP_TEMPLATE_VIS vector; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_VECTOR_H diff --git a/yass/third_party/libc++/trunk/include/__hash_table b/yass/third_party/libc++/trunk/include/__hash_table index ec7d694c4a..a705117d01 100644 --- a/yass/third_party/libc++/trunk/include/__hash_table +++ b/yass/third_party/libc++/trunk/include/__hash_table @@ -28,12 +28,9 @@ #include <__type_traits/can_extract_key.h> #include <__type_traits/conditional.h> #include <__type_traits/is_const.h> -#include <__type_traits/is_copy_constructible.h> +#include <__type_traits/is_constructible.h> +#include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_nothrow_copy_constructible.h> -#include <__type_traits/is_nothrow_default_constructible.h> -#include <__type_traits/is_nothrow_move_assignable.h> -#include <__type_traits/is_nothrow_move_constructible.h> #include <__type_traits/is_pointer.h> #include <__type_traits/is_reference.h> #include <__type_traits/is_swappable.h> @@ -284,13 +281,21 @@ public: _LIBCPP_HIDE_FROM_ABI __hash_iterator() _NOEXCEPT : __node_(nullptr) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __node_->__upcast()->__get_value(); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container iterator"); + return __node_->__upcast()->__get_value(); + } _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container iterator"); return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_HIDE_FROM_ABI __hash_iterator& operator++() { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to increment a non-incrementable unordered container iterator"); __node_ = __node_->__next_; return *this; } @@ -345,12 +350,20 @@ public: _LIBCPP_HIDE_FROM_ABI __hash_const_iterator(const __non_const_iterator& __x) _NOEXCEPT : __node_(__x.__node_) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __node_->__upcast()->__get_value(); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container const_iterator"); + return __node_->__upcast()->__get_value(); + } _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container const_iterator"); return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_HIDE_FROM_ABI __hash_const_iterator& operator++() { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to increment a non-incrementable unordered container const_iterator"); __node_ = __node_->__next_; return *this; } @@ -400,13 +413,21 @@ public: _LIBCPP_HIDE_FROM_ABI __hash_local_iterator() _NOEXCEPT : __node_(nullptr) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __node_->__upcast()->__get_value(); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container local_iterator"); + return __node_->__upcast()->__get_value(); + } _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container local_iterator"); return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_HIDE_FROM_ABI __hash_local_iterator& operator++() { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to increment a non-incrementable unordered container local_iterator"); __node_ = __node_->__next_; if (__node_ != nullptr && std::__constrain_hash(__node_->__hash(), __bucket_count_) != __bucket_) __node_ = nullptr; @@ -475,13 +496,21 @@ public: __bucket_(__x.__bucket_), __bucket_count_(__x.__bucket_count_) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __node_->__upcast()->__get_value(); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container const_local_iterator"); + return __node_->__upcast()->__get_value(); + } _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to dereference a non-dereferenceable unordered container const_local_iterator"); return pointer_traits::pointer_to(__node_->__upcast()->__get_value()); } _LIBCPP_HIDE_FROM_ABI __hash_const_local_iterator& operator++() { + _LIBCPP_ASSERT_NON_NULL( + __node_ != nullptr, "Attempted to increment a non-incrementable unordered container const_local_iterator"); __node_ = __node_->__next_; if (__node_ != nullptr && std::__constrain_hash(__node_->__hash(), __bucket_count_) != __bucket_) __node_ = nullptr; diff --git a/yass/third_party/libc++/trunk/include/__iterator/bounded_iter.h b/yass/third_party/libc++/trunk/include/__iterator/bounded_iter.h index 906ba3df0c..a1a941ffba 100644 --- a/yass/third_party/libc++/trunk/include/__iterator/bounded_iter.h +++ b/yass/third_party/libc++/trunk/include/__iterator/bounded_iter.h @@ -31,13 +31,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD // Iterator wrapper that carries the valid range it is allowed to access. // // This is a simple iterator wrapper for contiguous iterators that points -// within a [begin, end) range and carries these bounds with it. The iterator -// ensures that it is pointing within that [begin, end) range when it is -// dereferenced. +// within a [begin, end] range and carries these bounds with it. The iterator +// ensures that it is pointing within [begin, end) range when it is +// dereferenced. It also ensures that it is never iterated outside of +// [begin, end]. This is important for two reasons: // -// Arithmetic operations are allowed and the bounds of the resulting iterator -// are not checked. Hence, it is possible to create an iterator pointing outside -// its range, but it is not possible to dereference it. +// 1. It allows `operator*` and `operator++` bounds checks to be `iter != end`. +// This is both less for the optimizer to prove, and aligns with how callers +// typically use iterators. +// +// 2. Advancing an iterator out of bounds is undefined behavior (see the table +// in [input.iterators]). In particular, when the underlying iterator is a +// pointer, it is undefined at the language level (see [expr.add]). If +// bounded iterators exhibited this undefined behavior, we risk compiler +// optimizations deleting non-redundant bounds checks. template ::value > > struct __bounded_iter { using value_type = typename iterator_traits<_Iterator>::value_type; @@ -51,8 +58,8 @@ struct __bounded_iter { // Create a singular iterator. // - // Such an iterator does not point to any object and is conceptually out of bounds, so it is - // not dereferenceable. Observing operations like comparison and assignment are valid. + // Such an iterator points past the end of an empty span, so it is not dereferenceable. + // Observing operations like comparison and assignment are valid. _LIBCPP_HIDE_FROM_ABI __bounded_iter() = default; _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter const&) = default; @@ -70,18 +77,20 @@ struct __bounded_iter { private: // Create an iterator wrapping the given iterator, and whose bounds are described - // by the provided [begin, end) range. + // by the provided [begin, end] range. // - // This constructor does not check whether the resulting iterator is within its bounds. - // However, it does check that the provided [begin, end) range is a valid range (that - // is, begin <= end). + // The constructor does not check whether the resulting iterator is within its bounds. It is a + // responsibility of the container to ensure that the given bounds are valid. // // Since it is non-standard for iterators to have this constructor, __bounded_iter must // be created via `std::__make_bounded_iter`. _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __bounded_iter( _Iterator __current, _Iterator __begin, _Iterator __end) : __current_(__current), __begin_(__begin), __end_(__end) { - _LIBCPP_ASSERT_INTERNAL(__begin <= __end, "__bounded_iter(current, begin, end): [begin, end) is not a valid range"); + _LIBCPP_ASSERT_INTERNAL( + __begin <= __current, "__bounded_iter(current, begin, end): current and begin are inconsistent"); + _LIBCPP_ASSERT_INTERNAL( + __current <= __end, "__bounded_iter(current, begin, end): current and end are inconsistent"); } template @@ -90,30 +99,37 @@ private: public: // Dereference and indexing operations. // - // These operations check that the iterator is dereferenceable, that is within [begin, end). + // These operations check that the iterator is dereferenceable. Since the class invariant is + // that the iterator is always within `[begin, end]`, we only need to check it's not pointing to + // `end`. This is easier for the optimizer because it aligns with the `iter != container.end()` + // checks that typical callers already use (see + // https://github.com/llvm/llvm-project/issues/78829). _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( - __in_bounds(__current_), "__bounded_iter::operator*: Attempt to dereference an out-of-range iterator"); + __current_ != __end_, "__bounded_iter::operator*: Attempt to dereference an iterator at the end"); return *__current_; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pointer operator->() const _NOEXCEPT { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( - __in_bounds(__current_), "__bounded_iter::operator->: Attempt to dereference an out-of-range iterator"); + __current_ != __end_, "__bounded_iter::operator->: Attempt to dereference an iterator at the end"); return std::__to_address(__current_); } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator[](difference_type __n) const _NOEXCEPT { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( - __in_bounds(__current_ + __n), "__bounded_iter::operator[]: Attempt to index an iterator out-of-range"); + __n >= __begin_ - __current_, "__bounded_iter::operator[]: Attempt to index an iterator past the start"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n < __end_ - __current_, "__bounded_iter::operator[]: Attempt to index an iterator at or past the end"); return __current_[__n]; } // Arithmetic operations. // - // These operations do not check that the resulting iterator is within the bounds, since that - // would make it impossible to create a past-the-end iterator. + // These operations check that the iterator remains within `[begin, end]`. _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __bounded_iter& operator++() _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __current_ != __end_, "__bounded_iter::operator++: Attempt to advance an iterator past the end"); ++__current_; return *this; } @@ -124,6 +140,8 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __bounded_iter& operator--() _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __current_ != __begin_, "__bounded_iter::operator--: Attempt to rewind an iterator past the start"); --__current_; return *this; } @@ -134,6 +152,10 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __bounded_iter& operator+=(difference_type __n) _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n >= __begin_ - __current_, "__bounded_iter::operator+=: Attempt to rewind an iterator past the start"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n <= __end_ - __current_, "__bounded_iter::operator+=: Attempt to advance an iterator past the end"); __current_ += __n; return *this; } @@ -151,6 +173,10 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __bounded_iter& operator-=(difference_type __n) _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n <= __current_ - __begin_, "__bounded_iter::operator-=: Attempt to rewind an iterator past the start"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n >= __current_ - __end_, "__bounded_iter::operator-=: Attempt to advance an iterator past the end"); __current_ -= __n; return *this; } @@ -197,15 +223,10 @@ public: } private: - // Return whether the given iterator is in the bounds of this __bounded_iter. - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __in_bounds(_Iterator const& __iter) const { - return __iter >= __begin_ && __iter < __end_; - } - template friend struct pointer_traits; _Iterator __current_; // current iterator - _Iterator __begin_, __end_; // valid range represented as [begin, end) + _Iterator __begin_, __end_; // valid range represented as [begin, end] }; template diff --git a/yass/third_party/libc++/trunk/include/__iterator/cpp17_iterator_concepts.h b/yass/third_party/libc++/trunk/include/__iterator/cpp17_iterator_concepts.h index d1ad2b4e28..cdb561e684 100644 --- a/yass/third_party/libc++/trunk/include/__iterator/cpp17_iterator_concepts.h +++ b/yass/third_party/libc++/trunk/include/__iterator/cpp17_iterator_concepts.h @@ -14,10 +14,8 @@ #include <__concepts/same_as.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> -#include <__type_traits/is_copy_constructible.h> -#include <__type_traits/is_default_constructible.h> -#include <__type_traits/is_move_constructible.h> #include <__type_traits/is_signed.h> #include <__type_traits/is_void.h> #include <__utility/as_const.h> diff --git a/yass/third_party/libc++/trunk/include/__iterator/iter_move.h b/yass/third_party/libc++/trunk/include/__iterator/iter_move.h index 202b94cccc..ba8aed3c0f 100644 --- a/yass/third_party/libc++/trunk/include/__iterator/iter_move.h +++ b/yass/third_party/libc++/trunk/include/__iterator/iter_move.h @@ -35,7 +35,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { namespace __iter_move { -void iter_move(); +void iter_move() = delete; template concept __unqualified_iter_move = __class_or_enum> && requires(_Tp&& __t) { diff --git a/yass/third_party/libc++/trunk/include/__iterator/iterator_traits.h b/yass/third_party/libc++/trunk/include/__iterator/iterator_traits.h index 2cd82525ba..11af9e3018 100644 --- a/yass/third_party/libc++/trunk/include/__iterator/iterator_traits.h +++ b/yass/third_party/libc++/trunk/include/__iterator/iterator_traits.h @@ -21,7 +21,6 @@ #include <__fwd/pair.h> #include <__iterator/incrementable_traits.h> #include <__iterator/readable_traits.h> -#include <__type_traits/add_const.h> #include <__type_traits/common_reference.h> #include <__type_traits/conditional.h> #include <__type_traits/disjunction.h> @@ -493,8 +492,8 @@ using __iter_mapped_type = typename iterator_traits<_InputIterator>::value_type: template using __iter_to_alloc_type = - pair< typename add_const::value_type::first_type>::type, - typename iterator_traits<_InputIterator>::value_type::second_type>; + pair::value_type::first_type, + typename iterator_traits<_InputIterator>::value_type::second_type>; template using __iterator_category_type = typename iterator_traits<_Iter>::iterator_category; diff --git a/yass/third_party/libc++/trunk/include/__iterator/ranges_iterator_traits.h b/yass/third_party/libc++/trunk/include/__iterator/ranges_iterator_traits.h index a30864199d..859e708204 100644 --- a/yass/third_party/libc++/trunk/include/__iterator/ranges_iterator_traits.h +++ b/yass/third_party/libc++/trunk/include/__iterator/ranges_iterator_traits.h @@ -13,7 +13,6 @@ #include <__config> #include <__fwd/pair.h> #include <__ranges/concepts.h> -#include <__type_traits/add_const.h> #include <__type_traits/remove_const.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -32,8 +31,7 @@ using __range_mapped_type = typename ranges::range_value_t<_Range>::second_type; template using __range_to_alloc_type = - pair::first_type>, - typename ranges::range_value_t<_Range>::second_type>; + pair::first_type, typename ranges::range_value_t<_Range>::second_type>; #endif diff --git a/yass/third_party/libc++/trunk/include/__iterator/reverse_iterator.h b/yass/third_party/libc++/trunk/include/__iterator/reverse_iterator.h index 5900b1c5ac..2ae1461934 100644 --- a/yass/third_party/libc++/trunk/include/__iterator/reverse_iterator.h +++ b/yass/third_party/libc++/trunk/include/__iterator/reverse_iterator.h @@ -34,7 +34,7 @@ #include <__type_traits/enable_if.h> #include <__type_traits/is_assignable.h> #include <__type_traits/is_convertible.h> -#include <__type_traits/is_nothrow_copy_constructible.h> +#include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_pointer.h> #include <__type_traits/is_same.h> #include <__utility/declval.h> @@ -316,172 +316,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator<_Ite } #endif -#if _LIBCPP_STD_VER <= 17 -template -using __unconstrained_reverse_iterator = reverse_iterator<_Iter>; -#else - -// __unconstrained_reverse_iterator allows us to use reverse iterators in the implementation of algorithms by working -// around a language issue in C++20. -// In C++20, when a reverse iterator wraps certain C++20-hostile iterators, calling comparison operators on it will -// result in a compilation error. However, calling comparison operators on the pristine hostile iterator is not -// an error. Thus, we cannot use reverse_iterators in the implementation of an algorithm that accepts a -// C++20-hostile iterator. This class is an internal workaround -- it is a copy of reverse_iterator with -// tweaks to make it support hostile iterators. -// -// A C++20-hostile iterator is one that defines a comparison operator where one of the arguments is an exact match -// and the other requires an implicit conversion, for example: -// friend bool operator==(const BaseIter&, const DerivedIter&); -// -// C++20 rules for rewriting equality operators create another overload of this function with parameters reversed: -// friend bool operator==(const DerivedIter&, const BaseIter&); -// -// This creates an ambiguity in overload resolution. -// -// Clang treats this ambiguity differently in different contexts. When operator== is actually called in the function -// body, the code is accepted with a warning. When a concept requires operator== to be a valid expression, however, -// it evaluates to false. Thus, the implementation of reverse_iterator::operator== can actually call operator== on its -// base iterators, but the constraints on reverse_iterator::operator== prevent it from being considered during overload -// resolution. This class simply removes the problematic constraints from comparison functions. -template -class __unconstrained_reverse_iterator { - _Iter __iter_; - -public: - static_assert(__has_bidirectional_iterator_category<_Iter>::value || bidirectional_iterator<_Iter>); - - using iterator_type = _Iter; - using iterator_category = - _If<__has_random_access_iterator_category<_Iter>::value, - random_access_iterator_tag, - __iterator_category_type<_Iter>>; - using pointer = __iterator_pointer_type<_Iter>; - using value_type = iter_value_t<_Iter>; - using difference_type = iter_difference_t<_Iter>; - using reference = iter_reference_t<_Iter>; - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator() = default; - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator(const __unconstrained_reverse_iterator&) = default; - _LIBCPP_HIDE_FROM_ABI constexpr explicit __unconstrained_reverse_iterator(_Iter __iter) : __iter_(__iter) {} - - _LIBCPP_HIDE_FROM_ABI constexpr _Iter base() const { return __iter_; } - _LIBCPP_HIDE_FROM_ABI constexpr reference operator*() const { - auto __tmp = __iter_; - return *--__tmp; - } - - _LIBCPP_HIDE_FROM_ABI constexpr pointer operator->() const { - if constexpr (is_pointer_v<_Iter>) { - return std::prev(__iter_); - } else { - return std::prev(__iter_).operator->(); - } - } - - _LIBCPP_HIDE_FROM_ABI friend constexpr iter_rvalue_reference_t<_Iter> - iter_move(const __unconstrained_reverse_iterator& __i) noexcept( - is_nothrow_copy_constructible_v<_Iter>&& noexcept(ranges::iter_move(--std::declval<_Iter&>()))) { - auto __tmp = __i.base(); - return ranges::iter_move(--__tmp); - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator& operator++() { - --__iter_; - return *this; - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator operator++(int) { - auto __tmp = *this; - --__iter_; - return __tmp; - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator& operator--() { - ++__iter_; - return *this; - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator operator--(int) { - auto __tmp = *this; - ++__iter_; - return __tmp; - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator& operator+=(difference_type __n) { - __iter_ -= __n; - return *this; - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator& operator-=(difference_type __n) { - __iter_ += __n; - return *this; - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator operator+(difference_type __n) const { - return __unconstrained_reverse_iterator(__iter_ - __n); - } - - _LIBCPP_HIDE_FROM_ABI constexpr __unconstrained_reverse_iterator operator-(difference_type __n) const { - return __unconstrained_reverse_iterator(__iter_ + __n); - } - - _LIBCPP_HIDE_FROM_ABI constexpr difference_type operator-(const __unconstrained_reverse_iterator& __other) const { - return __other.__iter_ - __iter_; - } - - _LIBCPP_HIDE_FROM_ABI constexpr auto operator[](difference_type __n) const { return *(*this + __n); } - - // Deliberately unconstrained unlike the comparison functions in `reverse_iterator` -- see the class comment for the - // rationale. - _LIBCPP_HIDE_FROM_ABI friend constexpr bool - operator==(const __unconstrained_reverse_iterator& __lhs, const __unconstrained_reverse_iterator& __rhs) { - return __lhs.base() == __rhs.base(); - } - - _LIBCPP_HIDE_FROM_ABI friend constexpr bool - operator!=(const __unconstrained_reverse_iterator& __lhs, const __unconstrained_reverse_iterator& __rhs) { - return __lhs.base() != __rhs.base(); - } - - _LIBCPP_HIDE_FROM_ABI friend constexpr bool - operator<(const __unconstrained_reverse_iterator& __lhs, const __unconstrained_reverse_iterator& __rhs) { - return __lhs.base() > __rhs.base(); - } - - _LIBCPP_HIDE_FROM_ABI friend constexpr bool - operator>(const __unconstrained_reverse_iterator& __lhs, const __unconstrained_reverse_iterator& __rhs) { - return __lhs.base() < __rhs.base(); - } - - _LIBCPP_HIDE_FROM_ABI friend constexpr bool - operator<=(const __unconstrained_reverse_iterator& __lhs, const __unconstrained_reverse_iterator& __rhs) { - return __lhs.base() >= __rhs.base(); - } - - _LIBCPP_HIDE_FROM_ABI friend constexpr bool - operator>=(const __unconstrained_reverse_iterator& __lhs, const __unconstrained_reverse_iterator& __rhs) { - return __lhs.base() <= __rhs.base(); - } -}; - -#endif // _LIBCPP_STD_VER <= 17 - -template